blob: 14aee653b3b62f0ec676c75e65d97bfab10d01d4 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
Daniel Veillard260a68f1998-08-13 03:39:55 +000015#include <stdio.h>
Daniel Veillard32bc74e2000-07-14 14:49:25 +000016#include <string.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000017#ifdef HAVE_CTYPE_H
18#include <ctype.h>
19#endif
20#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000021#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#endif
23#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000024#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000026#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_ZLIB_H
33#include <zlib.h>
34#endif
35
Daniel Veillard361d8452000-04-03 19:48:13 +000036#include <libxml/xmlmemory.h>
37#include <libxml/tree.h>
38#include <libxml/parser.h>
39#include <libxml/entities.h>
40#include <libxml/encoding.h>
41#include <libxml/valid.h>
42#include <libxml/parserInternals.h>
43#include <libxml/xmlIO.h>
Daniel Veillard496a1cf2000-05-03 14:20:55 +000044#include <libxml/uri.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000045#include "xml-error.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000046
Daniel Veillarddbfd6411999-12-28 16:35:14 +000047#define XML_PARSER_BIG_BUFFER_SIZE 1000
48#define XML_PARSER_BUFFER_SIZE 100
49
Daniel Veillardcf461992000-03-14 18:30:20 +000050int xmlGetWarningsDefaultValue = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +000051
Daniel Veillard3c558c31999-12-22 11:30:41 +000052/*
53 * List of XML prefixed PI allowed by W3C specs
54 */
55
56const char *xmlW3CPIs[] = {
57 "xml-stylesheet",
58 NULL
59};
Daniel Veillarde2d034d1999-07-27 19:52:06 +000060
Daniel Veillardcf461992000-03-14 18:30:20 +000061void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64 const xmlChar **str);
Daniel Veillard87b95392000-08-12 21:12:04 +000065/************************************************************************
66 * *
67 * Version and Features handling *
68 * *
69 ************************************************************************/
Daniel Veillardbe803962000-06-28 23:40:59 +000070const char *xmlParserVersion = LIBXML_VERSION_STRING;
71
72/*
73 * xmlCheckVersion:
74 * @version: the include version number
75 *
76 * check the compiled lib version against the include one.
77 * This can warn or immediately kill the application
78 */
79void
80xmlCheckVersion(int version) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000081 int myversion = (int) LIBXML_VERSION;
Daniel Veillardbe803962000-06-28 23:40:59 +000082
83 if ((myversion / 10000) != (version / 10000)) {
84 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
87 exit(1);
88 }
89 if ((myversion / 100) < (version / 100)) {
90 fprintf(stderr,
91 "Warning: program compiled against libxml %d using older %d\n",
92 (version / 100), (myversion / 100));
93 }
94}
95
96
Daniel Veillard87b95392000-08-12 21:12:04 +000097const char *xmlFeaturesList[] = {
98 "validate",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
141/*
142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
168/*
169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->validate;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
269/*
270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 ctxt->validate = *((int *) value);
286 } else if (!strcmp(name, "keep blanks")) {
287 ctxt->keepBlanks = *((int *) value);
288 } else if (!strcmp(name, "disable SAX")) {
289 ctxt->disableSAX = *((int *) value);
290 } else if (!strcmp(name, "fetch external entities")) {
291 int newvalid = *((int *) value);
292 if ((!ctxt->validate) && (newvalid != 0)) {
293 if (ctxt->vctxt.warning == NULL)
294 ctxt->vctxt.warning = xmlParserValidityWarning;
295 if (ctxt->vctxt.error == NULL)
296 ctxt->vctxt.error = xmlParserValidityError;
297 /* Allocate the Node stack */
298 ctxt->vctxt.nodeTab = (xmlNodePtr *)
299 xmlMalloc(4 * sizeof(xmlNodePtr));
300 ctxt->vctxt.nodeNr = 0;
301 ctxt->vctxt.nodeMax = 4;
302 ctxt->vctxt.node = NULL;
303 }
304 ctxt->validate = newvalid;
305 } else if (!strcmp(name, "substitute entities")) {
306 ctxt->replaceEntities = *((int *) value);
307 } else if (!strcmp(name, "gather line info")) {
308 ctxt->record_info = *((int *) value);
309 } else if (!strcmp(name, "user data")) {
310 ctxt->userData = *((void **)value);
311 } else if (!strcmp(name, "is html")) {
312 ctxt->html = *((int *) value);
313 } else if (!strcmp(name, "is standalone")) {
314 ctxt->standalone = *((int *) value);
315 } else if (!strcmp(name, "document")) {
316 ctxt->myDoc = *((xmlDocPtr *) value);
317 } else if (!strcmp(name, "is well formed")) {
318 ctxt->wellFormed = *((int *) value);
319 } else if (!strcmp(name, "is valid")) {
320 ctxt->valid = *((int *) value);
321 } else if (!strcmp(name, "SAX block")) {
322 ctxt->sax = *((xmlSAXHandlerPtr *) value);
323 } else if (!strcmp(name, "SAX function internalSubset")) {
324 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function isStandalone")) {
326 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
328 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
330 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function resolveEntity")) {
332 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
333 } else if (!strcmp(name, "SAX function getEntity")) {
334 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
335 } else if (!strcmp(name, "SAX function entityDecl")) {
336 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function notationDecl")) {
338 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function attributeDecl")) {
340 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function elementDecl")) {
342 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
344 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
346 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function startDocument")) {
348 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function endDocument")) {
350 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function startElement")) {
352 ctxt->sax->startElement = *((startElementSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function endElement")) {
354 ctxt->sax->endElement = *((endElementSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function reference")) {
356 ctxt->sax->reference = *((referenceSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function characters")) {
358 ctxt->sax->characters = *((charactersSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
360 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function processingInstruction")) {
362 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function comment")) {
364 ctxt->sax->comment = *((commentSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function warning")) {
366 ctxt->sax->warning = *((warningSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function error")) {
368 ctxt->sax->error = *((errorSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function fatalError")) {
370 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function getParameterEntity")) {
372 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
373 } else if (!strcmp(name, "SAX function cdataBlock")) {
374 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
375 } else if (!strcmp(name, "SAX function externalSubset")) {
376 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
377 } else {
378 return(-1);
379 }
380 return(0);
381}
382
383
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000384/************************************************************************
385 * *
386 * Input handling functions for progressive parsing *
387 * *
388 ************************************************************************/
389
390/* #define DEBUG_INPUT */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000391/* #define DEBUG_STACK */
392/* #define DEBUG_PUSH */
393
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000394
Daniel Veillardb05deb71999-08-10 19:04:08 +0000395#define INPUT_CHUNK 250
396/* we need to keep enough input to show errors in context */
397#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000398
399#ifdef DEBUG_INPUT
400#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000401
402void check_buffer(xmlParserInputPtr in) {
403 if (in->base != in->buf->buffer->content) {
404 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
405 }
406 if (in->cur < in->base) {
407 fprintf(stderr, "xmlParserInput: cur < base problem\n");
408 }
409 if (in->cur > in->base + in->buf->buffer->use) {
410 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
411 }
412 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
413 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
414 in->buf->buffer->use, in->buf->buffer->size);
415}
416
Daniel Veillardb05deb71999-08-10 19:04:08 +0000417#else
418#define CHECK_BUFFER(in)
419#endif
420
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000421
422/**
423 * xmlParserInputRead:
424 * @in: an XML parser input
425 * @len: an indicative size for the lookahead
426 *
427 * This function refresh the input for the parser. It doesn't try to
428 * preserve pointers to the input buffer, and discard already read data
429 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000430 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000431 * end of this entity
432 */
433int
434xmlParserInputRead(xmlParserInputPtr in, int len) {
435 int ret;
436 int used;
437 int index;
438
439#ifdef DEBUG_INPUT
440 fprintf(stderr, "Read\n");
441#endif
442 if (in->buf == NULL) return(-1);
443 if (in->base == NULL) return(-1);
444 if (in->cur == NULL) return(-1);
445 if (in->buf->buffer == NULL) return(-1);
446
447 CHECK_BUFFER(in);
448
449 used = in->cur - in->buf->buffer->content;
450 ret = xmlBufferShrink(in->buf->buffer, used);
451 if (ret > 0) {
452 in->cur -= ret;
453 in->consumed += ret;
454 }
455 ret = xmlParserInputBufferRead(in->buf, len);
456 if (in->base != in->buf->buffer->content) {
457 /*
458 * the buffer has been realloced
459 */
460 index = in->cur - in->base;
461 in->base = in->buf->buffer->content;
462 in->cur = &in->buf->buffer->content[index];
463 }
464
465 CHECK_BUFFER(in);
466
467 return(ret);
468}
469
470/**
471 * xmlParserInputGrow:
472 * @in: an XML parser input
473 * @len: an indicative size for the lookahead
474 *
475 * This function increase the input for the parser. It tries to
476 * preserve pointers to the input buffer, and keep already read data
477 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000478 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000479 * end of this entity
480 */
481int
482xmlParserInputGrow(xmlParserInputPtr in, int len) {
483 int ret;
484 int index;
485
486#ifdef DEBUG_INPUT
487 fprintf(stderr, "Grow\n");
488#endif
489 if (in->buf == NULL) return(-1);
490 if (in->base == NULL) return(-1);
491 if (in->cur == NULL) return(-1);
492 if (in->buf->buffer == NULL) return(-1);
493
494 CHECK_BUFFER(in);
495
496 index = in->cur - in->base;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000497 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000498
499 CHECK_BUFFER(in);
500
501 return(0);
502 }
Daniel Veillard5d211f42000-04-07 17:00:24 +0000503 if (in->buf->readcallback != NULL)
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000504 ret = xmlParserInputBufferGrow(in->buf, len);
505 else
506 return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000507
508 /*
509 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
510 * block, but we use it really as an integer to do some
511 * pointer arithmetic. Insure will raise it as a bug but in
512 * that specific case, that's not !
513 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000514 if (in->base != in->buf->buffer->content) {
515 /*
516 * the buffer has been realloced
517 */
518 index = in->cur - in->base;
519 in->base = in->buf->buffer->content;
520 in->cur = &in->buf->buffer->content[index];
521 }
522
523 CHECK_BUFFER(in);
524
525 return(ret);
526}
527
528/**
529 * xmlParserInputShrink:
530 * @in: an XML parser input
531 *
532 * This function removes used input for the parser.
533 */
534void
535xmlParserInputShrink(xmlParserInputPtr in) {
536 int used;
537 int ret;
538 int index;
539
540#ifdef DEBUG_INPUT
541 fprintf(stderr, "Shrink\n");
542#endif
543 if (in->buf == NULL) return;
544 if (in->base == NULL) return;
545 if (in->cur == NULL) return;
546 if (in->buf->buffer == NULL) return;
547
548 CHECK_BUFFER(in);
549
550 used = in->cur - in->buf->buffer->content;
551 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000552 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000553 if (ret > 0) {
554 in->cur -= ret;
555 in->consumed += ret;
556 }
557 }
558
559 CHECK_BUFFER(in);
560
561 if (in->buf->buffer->use > INPUT_CHUNK) {
562 return;
563 }
564 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
565 if (in->base != in->buf->buffer->content) {
566 /*
567 * the buffer has been realloced
568 */
569 index = in->cur - in->base;
570 in->base = in->buf->buffer->content;
571 in->cur = &in->buf->buffer->content[index];
572 }
573
574 CHECK_BUFFER(in);
575}
576
Daniel Veillard260a68f1998-08-13 03:39:55 +0000577/************************************************************************
578 * *
579 * Parser stacks related functions and macros *
580 * *
581 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000582
583int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000584int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000585int xmlKeepBlanksDefaultValue = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000586xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
587 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000588
Daniel Veillard260a68f1998-08-13 03:39:55 +0000589/*
590 * Generic function for accessing stacks in the Parser Context
591 */
592
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000593#define PUSH_AND_POP(scope, type, name) \
594scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000595 if (ctxt->name##Nr >= ctxt->name##Max) { \
596 ctxt->name##Max *= 2; \
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000597 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000598 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
599 if (ctxt->name##Tab == NULL) { \
600 fprintf(stderr, "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000601 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000602 } \
603 } \
604 ctxt->name##Tab[ctxt->name##Nr] = value; \
605 ctxt->name = value; \
606 return(ctxt->name##Nr++); \
607} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000608scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000609 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000610 if (ctxt->name##Nr <= 0) return(0); \
611 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000612 if (ctxt->name##Nr > 0) \
613 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
614 else \
615 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000616 ret = ctxt->name##Tab[ctxt->name##Nr]; \
617 ctxt->name##Tab[ctxt->name##Nr] = 0; \
618 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000619} \
620
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000621PUSH_AND_POP(extern, xmlParserInputPtr, input)
622PUSH_AND_POP(extern, xmlNodePtr, node)
623PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000624
Daniel Veillardcf461992000-03-14 18:30:20 +0000625int spacePush(xmlParserCtxtPtr ctxt, int val) {
626 if (ctxt->spaceNr >= ctxt->spaceMax) {
627 ctxt->spaceMax *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000628 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
Daniel Veillardcf461992000-03-14 18:30:20 +0000629 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
630 if (ctxt->spaceTab == NULL) {
631 fprintf(stderr, "realloc failed !\n");
632 return(0);
633 }
634 }
635 ctxt->spaceTab[ctxt->spaceNr] = val;
636 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
637 return(ctxt->spaceNr++);
638}
639
640int spacePop(xmlParserCtxtPtr ctxt) {
641 int ret;
642 if (ctxt->spaceNr <= 0) return(0);
643 ctxt->spaceNr--;
644 if (ctxt->spaceNr > 0)
645 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
646 else
647 ctxt->space = NULL;
648 ret = ctxt->spaceTab[ctxt->spaceNr];
649 ctxt->spaceTab[ctxt->spaceNr] = -1;
650 return(ret);
651}
652
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000653/*
654 * Macros for accessing the content. Those should be used only by the parser,
655 * and not exported.
656 *
657 * Dirty macros, i.e. one need to make assumption on the context to use them
658 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000659 * CUR_PTR return the current pointer to the xmlChar to be parsed.
Daniel Veillardcf461992000-03-14 18:30:20 +0000660 * To be used with extreme caution since operations consuming
661 * characters may move the input buffer to a different location !
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000662 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillardcf461992000-03-14 18:30:20 +0000663 * in ISO-Latin or UTF-8.
664 * This should be used internally by the parser
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000665 * only to compare to ASCII values otherwise it would break when
666 * running with UTF-8 encoding.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000667 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000668 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000669 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000670 * strings within the parser.
671 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000672 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000673 *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000674 * NEXT Skip to the next character, this does the proper decoding
675 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000676 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillardcf461992000-03-14 18:30:20 +0000677 * CUR_CHAR Return the current char as an int as well as its lenght.
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000678 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000679
Daniel Veillardcf461992000-03-14 18:30:20 +0000680#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000681#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000682#define NXT(val) ctxt->input->cur[(val)]
683#define CUR_PTR ctxt->input->cur
Daniel Veillardcf461992000-03-14 18:30:20 +0000684
685#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
686 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
687 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
688 if ((*ctxt->input->cur == 0) && \
689 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
690 xmlPopInput(ctxt)
691
Daniel Veillardb05deb71999-08-10 19:04:08 +0000692#define SHRINK xmlParserInputShrink(ctxt->input); \
693 if ((*ctxt->input->cur == 0) && \
694 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
695 xmlPopInput(ctxt)
696
697#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
698 if ((*ctxt->input->cur == 0) && \
699 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
700 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000701
Daniel Veillardcf461992000-03-14 18:30:20 +0000702#define SKIP_BLANKS xmlSkipBlankChars(ctxt);
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000703
Daniel Veillardcf461992000-03-14 18:30:20 +0000704#define NEXT xmlNextChar(ctxt);
705
706#define NEXTL(l) \
707 if (*(ctxt->input->cur) == '\n') { \
708 ctxt->input->line++; ctxt->input->col = 1; \
709 } else ctxt->input->col++; \
710 ctxt->token = 0; ctxt->input->cur += l; \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000711 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillardcf461992000-03-14 18:30:20 +0000712 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000713
Daniel Veillardcf461992000-03-14 18:30:20 +0000714#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
715#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
716
717#define COPY_BUF(l,b,i,v) \
718 if (l == 1) b[i++] = (xmlChar) v; \
719 else i += xmlCopyChar(l,&b[i],v);
720
721/**
722 * xmlNextChar:
723 * @ctxt: the XML parser context
724 *
725 * Skip to the next char input char.
726 */
727
728void
729xmlNextChar(xmlParserCtxtPtr ctxt) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000730 if (ctxt->instate == XML_PARSER_EOF)
731 return;
732
Daniel Veillardcf461992000-03-14 18:30:20 +0000733 /*
734 * TODO: 2.11 End-of-Line Handling
735 * the literal two-character sequence "#xD#xA" or a standalone
736 * literal #xD, an XML processor must pass to the application
737 * the single character #xA.
738 */
739 if (ctxt->token != 0) ctxt->token = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000740 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000741 if ((*ctxt->input->cur == 0) &&
742 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
743 (ctxt->instate != XML_PARSER_COMMENT)) {
744 /*
745 * If we are at the end of the current entity and
746 * the context allows it, we pop consumed entities
747 * automatically.
748 * TODO: the auto closing should be blocked in other cases
749 */
750 xmlPopInput(ctxt);
751 } else {
752 if (*(ctxt->input->cur) == '\n') {
753 ctxt->input->line++; ctxt->input->col = 1;
754 } else ctxt->input->col++;
Daniel Veillardbe803962000-06-28 23:40:59 +0000755 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000756 /*
757 * We are supposed to handle UTF8, check it's valid
758 * From rfc2044: encoding of the Unicode values on UTF-8:
759 *
760 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
761 * 0000 0000-0000 007F 0xxxxxxx
762 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
763 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
764 *
765 * Check for the 0x110000 limit too
766 */
767 const unsigned char *cur = ctxt->input->cur;
768 unsigned char c;
769
770 c = *cur;
771 if (c & 0x80) {
772 if (cur[1] == 0)
773 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
774 if ((cur[1] & 0xc0) != 0x80)
775 goto encoding_error;
776 if ((c & 0xe0) == 0xe0) {
777 unsigned int val;
778
779 if (cur[2] == 0)
780 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
781 if ((cur[2] & 0xc0) != 0x80)
782 goto encoding_error;
783 if ((c & 0xf0) == 0xf0) {
784 if (cur[3] == 0)
785 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
786 if (((c & 0xf8) != 0xf0) ||
787 ((cur[3] & 0xc0) != 0x80))
788 goto encoding_error;
789 /* 4-byte code */
790 ctxt->input->cur += 4;
791 val = (cur[0] & 0x7) << 18;
792 val |= (cur[1] & 0x3f) << 12;
793 val |= (cur[2] & 0x3f) << 6;
794 val |= cur[3] & 0x3f;
795 } else {
796 /* 3-byte code */
797 ctxt->input->cur += 3;
798 val = (cur[0] & 0xf) << 12;
799 val |= (cur[1] & 0x3f) << 6;
800 val |= cur[2] & 0x3f;
801 }
802 if (((val > 0xd7ff) && (val < 0xe000)) ||
803 ((val > 0xfffd) && (val < 0x10000)) ||
804 (val >= 0x110000)) {
805 if ((ctxt->sax != NULL) &&
806 (ctxt->sax->error != NULL))
807 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000808 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +0000809 ctxt->errNo = XML_ERR_INVALID_ENCODING;
810 ctxt->wellFormed = 0;
811 ctxt->disableSAX = 1;
812 }
813 } else
814 /* 2-byte code */
815 ctxt->input->cur += 2;
816 } else
817 /* 1-byte code */
818 ctxt->input->cur++;
819 } else {
820 /*
821 * Assume it's a fixed lenght encoding (1) with
822 * a compatibke encoding for the ASCII set, since
823 * XML constructs only use < 128 chars
824 */
825 ctxt->input->cur++;
826 }
827 ctxt->nbChars++;
828 if (*ctxt->input->cur == 0)
829 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
830 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000831 } else {
832 ctxt->input->cur++;
833 ctxt->nbChars++;
834 if (*ctxt->input->cur == 0)
835 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillardcf461992000-03-14 18:30:20 +0000836 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000837 if ((*ctxt->input->cur == '%') && (!ctxt->html))
838 xmlParserHandlePEReference(ctxt);
839 if ((*ctxt->input->cur == '&')&& (!ctxt->html))
840 xmlParserHandleReference(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000841 if ((*ctxt->input->cur == 0) &&
842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
843 xmlPopInput(ctxt);
844 return;
845encoding_error:
846 /*
847 * If we detect an UTF8 error that probably mean that the
848 * input encoding didn't get properly advertized in the
849 * declaration header. Report the error and switch the encoding
850 * to ISO-Latin-1 (if you don't like this policy, just declare the
851 * encoding !)
852 */
Daniel Veillardbe803962000-06-28 23:40:59 +0000853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000854 ctxt->sax->error(ctxt->userData,
855 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +0000856 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
857 ctxt->input->cur[0], ctxt->input->cur[1],
858 ctxt->input->cur[2], ctxt->input->cur[3]);
859 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000860 ctxt->errNo = XML_ERR_INVALID_ENCODING;
861
Daniel Veillardbe803962000-06-28 23:40:59 +0000862 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillardcf461992000-03-14 18:30:20 +0000863 ctxt->input->cur++;
864 return;
865}
866
867/**
868 * xmlCurrentChar:
869 * @ctxt: the XML parser context
870 * @len: pointer to the length of the char read
871 *
872 * The current char value, if using UTF-8 this may actaully span multiple
873 * bytes in the input buffer. Implement the end of line normalization:
874 * 2.11 End-of-Line Handling
875 * Wherever an external parsed entity or the literal entity value
876 * of an internal parsed entity contains either the literal two-character
877 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
878 * must pass to the application the single character #xA.
879 * This behavior can conveniently be produced by normalizing all
880 * line breaks to #xA on input, before parsing.)
881 *
882 * Returns the current char value and its lenght
883 */
884
885int
886xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000887 if (ctxt->instate == XML_PARSER_EOF)
888 return(0);
889
Daniel Veillardcf461992000-03-14 18:30:20 +0000890 if (ctxt->token != 0) {
891 *len = 0;
892 return(ctxt->token);
893 }
Daniel Veillardbe803962000-06-28 23:40:59 +0000894 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000895 /*
896 * We are supposed to handle UTF8, check it's valid
897 * From rfc2044: encoding of the Unicode values on UTF-8:
898 *
899 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
900 * 0000 0000-0000 007F 0xxxxxxx
901 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
902 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
903 *
904 * Check for the 0x110000 limit too
905 */
906 const unsigned char *cur = ctxt->input->cur;
907 unsigned char c;
908 unsigned int val;
909
910 c = *cur;
911 if (c & 0x80) {
912 if (cur[1] == 0)
913 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
914 if ((cur[1] & 0xc0) != 0x80)
915 goto encoding_error;
916 if ((c & 0xe0) == 0xe0) {
917
918 if (cur[2] == 0)
919 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
920 if ((cur[2] & 0xc0) != 0x80)
921 goto encoding_error;
922 if ((c & 0xf0) == 0xf0) {
923 if (cur[3] == 0)
924 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
925 if (((c & 0xf8) != 0xf0) ||
926 ((cur[3] & 0xc0) != 0x80))
927 goto encoding_error;
928 /* 4-byte code */
929 *len = 4;
930 val = (cur[0] & 0x7) << 18;
931 val |= (cur[1] & 0x3f) << 12;
932 val |= (cur[2] & 0x3f) << 6;
933 val |= cur[3] & 0x3f;
934 } else {
935 /* 3-byte code */
936 *len = 3;
937 val = (cur[0] & 0xf) << 12;
938 val |= (cur[1] & 0x3f) << 6;
939 val |= cur[2] & 0x3f;
940 }
941 } else {
942 /* 2-byte code */
943 *len = 2;
944 val = (cur[0] & 0x1f) << 6;
945 val |= cur[1] & 0x3f;
946 }
947 if (!IS_CHAR(val)) {
948 if ((ctxt->sax != NULL) &&
949 (ctxt->sax->error != NULL))
950 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000951 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +0000952 ctxt->errNo = XML_ERR_INVALID_ENCODING;
953 ctxt->wellFormed = 0;
954 ctxt->disableSAX = 1;
955 }
956 return(val);
957 } else {
958 /* 1-byte code */
959 *len = 1;
960 if (*ctxt->input->cur == 0xD) {
961 if (ctxt->input->cur[1] == 0xA) {
962 ctxt->nbChars++;
963 ctxt->input->cur++;
964 }
965 return(0xA);
966 }
967 return((int) *ctxt->input->cur);
968 }
969 }
970 /*
971 * Assume it's a fixed lenght encoding (1) with
972 * a compatibke encoding for the ASCII set, since
973 * XML constructs only use < 128 chars
974 */
975 *len = 1;
976 if (*ctxt->input->cur == 0xD) {
977 if (ctxt->input->cur[1] == 0xA) {
978 ctxt->nbChars++;
979 ctxt->input->cur++;
980 }
981 return(0xA);
982 }
983 return((int) *ctxt->input->cur);
984encoding_error:
985 /*
986 * If we detect an UTF8 error that probably mean that the
987 * input encoding didn't get properly advertized in the
988 * declaration header. Report the error and switch the encoding
989 * to ISO-Latin-1 (if you don't like this policy, just declare the
990 * encoding !)
991 */
Daniel Veillardbe803962000-06-28 23:40:59 +0000992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000993 ctxt->sax->error(ctxt->userData,
994 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +0000995 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
996 ctxt->input->cur[0], ctxt->input->cur[1],
997 ctxt->input->cur[2], ctxt->input->cur[3]);
998 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000999 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1000
Daniel Veillardbe803962000-06-28 23:40:59 +00001001 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001002 *len = 1;
1003 return((int) *ctxt->input->cur);
1004}
1005
1006/**
1007 * xmlStringCurrentChar:
1008 * @ctxt: the XML parser context
1009 * @cur: pointer to the beginning of the char
1010 * @len: pointer to the length of the char read
1011 *
1012 * The current char value, if using UTF-8 this may actaully span multiple
1013 * bytes in the input buffer.
1014 *
1015 * Returns the current char value and its lenght
1016 */
1017
1018int
1019xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillardbe803962000-06-28 23:40:59 +00001020 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001021 /*
1022 * We are supposed to handle UTF8, check it's valid
1023 * From rfc2044: encoding of the Unicode values on UTF-8:
1024 *
1025 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1026 * 0000 0000-0000 007F 0xxxxxxx
1027 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1028 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1029 *
1030 * Check for the 0x110000 limit too
1031 */
1032 unsigned char c;
1033 unsigned int val;
1034
1035 c = *cur;
1036 if (c & 0x80) {
1037 if ((cur[1] & 0xc0) != 0x80)
1038 goto encoding_error;
1039 if ((c & 0xe0) == 0xe0) {
1040
1041 if ((cur[2] & 0xc0) != 0x80)
1042 goto encoding_error;
1043 if ((c & 0xf0) == 0xf0) {
1044 if (((c & 0xf8) != 0xf0) ||
1045 ((cur[3] & 0xc0) != 0x80))
1046 goto encoding_error;
1047 /* 4-byte code */
1048 *len = 4;
1049 val = (cur[0] & 0x7) << 18;
1050 val |= (cur[1] & 0x3f) << 12;
1051 val |= (cur[2] & 0x3f) << 6;
1052 val |= cur[3] & 0x3f;
1053 } else {
1054 /* 3-byte code */
1055 *len = 3;
1056 val = (cur[0] & 0xf) << 12;
1057 val |= (cur[1] & 0x3f) << 6;
1058 val |= cur[2] & 0x3f;
1059 }
1060 } else {
1061 /* 2-byte code */
1062 *len = 2;
1063 val = (cur[0] & 0x1f) << 6;
1064 val |= cur[2] & 0x3f;
1065 }
1066 if (!IS_CHAR(val)) {
1067 if ((ctxt->sax != NULL) &&
1068 (ctxt->sax->error != NULL))
1069 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +00001070 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +00001071 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1072 ctxt->wellFormed = 0;
1073 ctxt->disableSAX = 1;
1074 }
1075 return(val);
1076 } else {
1077 /* 1-byte code */
1078 *len = 1;
1079 return((int) *cur);
1080 }
1081 }
1082 /*
1083 * Assume it's a fixed lenght encoding (1) with
1084 * a compatibke encoding for the ASCII set, since
1085 * XML constructs only use < 128 chars
1086 */
1087 *len = 1;
1088 return((int) *cur);
1089encoding_error:
1090 /*
1091 * If we detect an UTF8 error that probably mean that the
1092 * input encoding didn't get properly advertized in the
1093 * declaration header. Report the error and switch the encoding
1094 * to ISO-Latin-1 (if you don't like this policy, just declare the
1095 * encoding !)
1096 */
Daniel Veillardbe803962000-06-28 23:40:59 +00001097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001098 ctxt->sax->error(ctxt->userData,
1099 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +00001100 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1101 ctxt->input->cur[0], ctxt->input->cur[1],
1102 ctxt->input->cur[2], ctxt->input->cur[3]);
1103 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001104 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1105
1106 *len = 1;
1107 return((int) *cur);
1108}
1109
1110/**
1111 * xmlCopyChar:
1112 * @len: pointer to the length of the char read (or zero)
1113 * @array: pointer to an arry of xmlChar
1114 * @val: the char value
1115 *
1116 * append the char value in the array
1117 *
1118 * Returns the number of xmlChar written
1119 */
1120
1121int
1122xmlCopyChar(int len, xmlChar *out, int val) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 */
1132 if (len == 0) {
1133 if (val < 0) len = 0;
1134 else if (val < 0x80) len = 1;
1135 else if (val < 0x800) len = 2;
1136 else if (val < 0x10000) len = 3;
1137 else if (val < 0x110000) len = 4;
1138 if (len == 0) {
1139 fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
1140 val);
1141 return(0);
1142 }
1143 }
1144 if (len > 1) {
1145 int bits;
1146
1147 if (val < 0x80) { *out++= val; bits= -6; }
1148 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1149 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1150 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1151
1152 for ( ; bits >= 0; bits-= 6)
1153 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1154
1155 return(len);
1156 }
1157 *out = (xmlChar) val;
1158 return(1);
1159}
1160
1161/**
1162 * xmlSkipBlankChars:
1163 * @ctxt: the XML parser context
1164 *
1165 * skip all blanks character found at that point in the input streams.
1166 * It pops up finished entities in the process if allowable at that point.
1167 *
1168 * Returns the number of space chars skipped
1169 */
1170
1171int
1172xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1173 int cur, res = 0;
1174
1175 do {
1176 cur = CUR;
1177 while (IS_BLANK(cur)) {
1178 NEXT;
1179 cur = CUR;
1180 res++;
1181 }
1182 while ((cur == 0) && (ctxt->inputNr > 1) &&
1183 (ctxt->instate != XML_PARSER_COMMENT)) {
1184 xmlPopInput(ctxt);
1185 cur = CUR;
1186 }
1187 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1188 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1189 } while (IS_BLANK(cur));
1190 return(res);
1191}
Daniel Veillard260a68f1998-08-13 03:39:55 +00001192
Daniel Veillardb05deb71999-08-10 19:04:08 +00001193/************************************************************************
1194 * *
1195 * Commodity functions to handle entities processing *
1196 * *
1197 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +00001198
Daniel Veillard11e00581998-10-24 18:27:49 +00001199/**
1200 * xmlPopInput:
1201 * @ctxt: an XML parser context
1202 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001203 * xmlPopInput: the current input pointed by ctxt->input came to an end
1204 * pop it and return the next char.
1205 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001206 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00001207 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001208xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001209xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001210 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +00001211 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001215 return(CUR);
1216}
1217
Daniel Veillard11e00581998-10-24 18:27:49 +00001218/**
1219 * xmlPushInput:
1220 * @ctxt: an XML parser context
1221 * @input: an XML parser input fragment (entity, XML fragment ...).
1222 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001223 * xmlPushInput: switch to a new input stream which is stacked on top
1224 * of the previous one(s).
1225 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001226void
1227xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001228 if (input == NULL) return;
1229 inputPush(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00001230 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001231}
1232
Daniel Veillard11e00581998-10-24 18:27:49 +00001233/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00001234 * xmlFreeInputStream:
Daniel Veillard51e3b151999-11-12 17:02:31 +00001235 * @input: an xmlParserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +00001236 *
1237 * Free up an input stream.
1238 */
1239void
1240xmlFreeInputStream(xmlParserInputPtr input) {
1241 if (input == NULL) return;
1242
Daniel Veillard6454aec1999-09-02 22:04:43 +00001243 if (input->filename != NULL) xmlFree((char *) input->filename);
1244 if (input->directory != NULL) xmlFree((char *) input->directory);
Daniel Veillardcf461992000-03-14 18:30:20 +00001245 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1246 if (input->version != NULL) xmlFree((char *) input->version);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001247 if ((input->free != NULL) && (input->base != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001248 input->free((xmlChar *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001249 if (input->buf != NULL)
1250 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001251 memset(input, -1, sizeof(xmlParserInput));
Daniel Veillard6454aec1999-09-02 22:04:43 +00001252 xmlFree(input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001253}
1254
1255/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00001256 * xmlNewInputStream:
1257 * @ctxt: an XML parser context
1258 *
1259 * Create a new input stream structure
1260 * Returns the new input stream or NULL
1261 */
1262xmlParserInputPtr
1263xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1264 xmlParserInputPtr input;
1265
Daniel Veillard6454aec1999-09-02 22:04:43 +00001266 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001267 if (input == NULL) {
Daniel Veillard5e873c42000-04-12 13:27:38 +00001268 if (ctxt != NULL) {
1269 ctxt->errNo = XML_ERR_NO_MEMORY;
1270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271 ctxt->sax->error(ctxt->userData,
1272 "malloc: couldn't allocate a new input stream\n");
1273 ctxt->errNo = XML_ERR_NO_MEMORY;
1274 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001275 return(NULL);
1276 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001277 memset(input, 0, sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001278 input->line = 1;
1279 input->col = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001280 input->standalone = -1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001281 return(input);
1282}
1283
1284/**
Daniel Veillard5e873c42000-04-12 13:27:38 +00001285 * xmlNewIOInputStream:
1286 * @ctxt: an XML parser context
1287 * @input: an I/O Input
1288 * @enc: the charset encoding if known
1289 *
1290 * Create a new input stream structure encapsulating the @input into
1291 * a stream suitable for the parser.
1292 *
1293 * Returns the new input stream or NULL
1294 */
1295xmlParserInputPtr
1296xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1297 xmlCharEncoding enc) {
1298 xmlParserInputPtr inputStream;
1299
1300 inputStream = xmlNewInputStream(ctxt);
1301 if (inputStream == NULL) {
1302 return(NULL);
1303 }
1304 inputStream->filename = NULL;
1305 inputStream->buf = input;
1306 inputStream->base = inputStream->buf->buffer->content;
1307 inputStream->cur = inputStream->buf->buffer->content;
1308 if (enc != XML_CHAR_ENCODING_NONE) {
1309 xmlSwitchEncoding(ctxt, enc);
1310 }
1311
1312 return(inputStream);
1313}
1314
1315/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001316 * xmlNewEntityInputStream:
1317 * @ctxt: an XML parser context
1318 * @entity: an Entity pointer
1319 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001320 * Create a new input stream based on an xmlEntityPtr
Daniel Veillardb96e6431999-08-29 21:02:19 +00001321 *
1322 * Returns the new input stream or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001323 */
Daniel Veillardccb09631998-10-27 06:21:04 +00001324xmlParserInputPtr
1325xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001326 xmlParserInputPtr input;
1327
1328 if (entity == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001331 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001332 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001333 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardccb09631998-10-27 06:21:04 +00001334 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001335 }
1336 if (entity->content == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001337 switch (entity->etype) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00001338 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001339 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341 ctxt->sax->error(ctxt->userData,
1342 "xmlNewEntityInputStream unparsed entity !\n");
1343 break;
1344 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1345 case XML_EXTERNAL_PARAMETER_ENTITY:
1346 return(xmlLoadExternalEntity((char *) entity->SystemID,
Daniel Veillard686d6b62000-01-03 11:08:02 +00001347 (char *) entity->ExternalID, ctxt));
Daniel Veillardb96e6431999-08-29 21:02:19 +00001348 case XML_INTERNAL_GENERAL_ENTITY:
1349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350 ctxt->sax->error(ctxt->userData,
1351 "Internal entity %s without content !\n", entity->name);
1352 break;
1353 case XML_INTERNAL_PARAMETER_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001354 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356 ctxt->sax->error(ctxt->userData,
1357 "Internal parameter entity %s without content !\n", entity->name);
1358 break;
1359 case XML_INTERNAL_PREDEFINED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001360 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362 ctxt->sax->error(ctxt->userData,
1363 "Predefined entity %s without content !\n", entity->name);
1364 break;
1365 }
Daniel Veillardccb09631998-10-27 06:21:04 +00001366 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001367 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001368 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001369 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +00001370 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001371 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001372 input->filename = (char *) entity->SystemID;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001373 input->base = entity->content;
1374 input->cur = entity->content;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001375 input->length = entity->length;
Daniel Veillardccb09631998-10-27 06:21:04 +00001376 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001377}
1378
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001379/**
1380 * xmlNewStringInputStream:
1381 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +00001382 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001383 *
1384 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +00001385 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001386 */
1387xmlParserInputPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001388xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001389 xmlParserInputPtr input;
1390
Daniel Veillardb05deb71999-08-10 19:04:08 +00001391 if (buffer == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001392 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001394 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001395 "internal: xmlNewStringInputStream string = NULL\n");
1396 return(NULL);
1397 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001398 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001399 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001400 return(NULL);
1401 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001402 input->base = buffer;
1403 input->cur = buffer;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001404 input->length = xmlStrlen(buffer);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001405 return(input);
1406}
1407
Daniel Veillard011b63c1999-06-02 17:44:04 +00001408/**
1409 * xmlNewInputFromFile:
1410 * @ctxt: an XML parser context
1411 * @filename: the filename to use as entity
1412 *
1413 * Create a new input stream based on a file.
1414 *
1415 * Returns the new input stream or NULL in case of error
1416 */
1417xmlParserInputPtr
1418xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001419 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001420 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001421 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001422
Daniel Veillardb05deb71999-08-10 19:04:08 +00001423 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001424 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001425 if (buf == NULL) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001426 char name[XML_PARSER_BIG_BUFFER_SIZE];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001427
Daniel Veillardb05deb71999-08-10 19:04:08 +00001428 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1429#ifdef WIN32
1430 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1431#else
1432 sprintf(name, "%s/%s", ctxt->input->directory, filename);
1433#endif
1434 buf = xmlParserInputBufferCreateFilename(name,
1435 XML_CHAR_ENCODING_NONE);
1436 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001437 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001438 }
1439 if ((buf == NULL) && (ctxt->directory != NULL)) {
1440#ifdef WIN32
1441 sprintf(name, "%s\\%s", ctxt->directory, filename);
1442#else
1443 sprintf(name, "%s/%s", ctxt->directory, filename);
1444#endif
1445 buf = xmlParserInputBufferCreateFilename(name,
1446 XML_CHAR_ENCODING_NONE);
1447 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001448 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001449 }
1450 if (buf == NULL)
1451 return(NULL);
1452 }
1453 if (directory == NULL)
1454 directory = xmlParserGetDirectory(filename);
1455
1456 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001457 if (inputStream == NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00001458 if (directory != NULL) xmlFree((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001459 return(NULL);
1460 }
1461
Daniel Veillard6454aec1999-09-02 22:04:43 +00001462 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001463 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001464 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001465
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001466 inputStream->base = inputStream->buf->buffer->content;
1467 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001468 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillard294cbca1999-12-03 13:19:09 +00001469 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001470 return(inputStream);
1471}
1472
1473/************************************************************************
1474 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00001475 * Commodity functions to handle parser contexts *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * xmlInitParserCtxt:
1481 * @ctxt: an XML parser context
1482 *
1483 * Initialize a parser context
1484 */
1485
1486void
1487xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1488{
1489 xmlSAXHandler *sax;
1490
Daniel Veillardcf461992000-03-14 18:30:20 +00001491 xmlDefaultSAXHandlerInit();
1492
Daniel Veillard6454aec1999-09-02 22:04:43 +00001493 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001494 if (sax == NULL) {
1495 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1496 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001497 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001498
1499 /* Allocate the Input stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001500 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001501 ctxt->inputNr = 0;
1502 ctxt->inputMax = 5;
1503 ctxt->input = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001504
Daniel Veillardb05deb71999-08-10 19:04:08 +00001505 ctxt->version = NULL;
1506 ctxt->encoding = NULL;
1507 ctxt->standalone = -1;
1508 ctxt->hasExternalSubset = 0;
1509 ctxt->hasPErefs = 0;
1510 ctxt->html = 0;
1511 ctxt->external = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001512 ctxt->instate = XML_PARSER_START;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001513 ctxt->token = 0;
1514 ctxt->directory = NULL;
1515
1516 /* Allocate the Node stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001517 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001518 ctxt->nodeNr = 0;
1519 ctxt->nodeMax = 10;
1520 ctxt->node = NULL;
1521
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001522 /* Allocate the Name stack */
1523 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1524 ctxt->nameNr = 0;
1525 ctxt->nameMax = 10;
1526 ctxt->name = NULL;
1527
Daniel Veillardcf461992000-03-14 18:30:20 +00001528 /* Allocate the space stack */
1529 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1530 ctxt->spaceNr = 1;
1531 ctxt->spaceMax = 10;
1532 ctxt->spaceTab[0] = -1;
1533 ctxt->space = &ctxt->spaceTab[0];
1534
1535 if (sax == NULL) {
1536 ctxt->sax = &xmlDefaultSAXHandler;
1537 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001538 ctxt->sax = sax;
1539 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1540 }
1541 ctxt->userData = ctxt;
1542 ctxt->myDoc = NULL;
1543 ctxt->wellFormed = 1;
1544 ctxt->valid = 1;
1545 ctxt->validate = xmlDoValidityCheckingDefaultValue;
Daniel Veillard83a30e72000-03-02 03:33:32 +00001546 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001547 ctxt->vctxt.userData = ctxt;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001548 if (ctxt->validate) {
1549 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillardcf461992000-03-14 18:30:20 +00001550 if (xmlGetWarningsDefaultValue == 0)
1551 ctxt->vctxt.warning = NULL;
1552 else
1553 ctxt->vctxt.warning = xmlParserValidityWarning;
1554 /* Allocate the Node stack */
1555 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1556 ctxt->vctxt.nodeNr = 0;
1557 ctxt->vctxt.nodeMax = 4;
1558 ctxt->vctxt.node = NULL;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001559 } else {
1560 ctxt->vctxt.error = NULL;
1561 ctxt->vctxt.warning = NULL;
1562 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001563 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1564 ctxt->record_info = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001565 ctxt->nbChars = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001566 ctxt->checkIndex = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001567 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001568 ctxt->errNo = XML_ERR_OK;
Daniel Veillardcf461992000-03-14 18:30:20 +00001569 ctxt->depth = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00001570 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001571 xmlInitNodeInfoSeq(&ctxt->node_seq);
1572}
1573
1574/**
1575 * xmlFreeParserCtxt:
1576 * @ctxt: an XML parser context
1577 *
1578 * Free all the memory used by a parser context. However the parsed
1579 * document in ctxt->myDoc is not freed.
1580 */
1581
1582void
1583xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1584{
1585 xmlParserInputPtr input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001586 xmlChar *oldname;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001587
1588 if (ctxt == NULL) return;
1589
1590 while ((input = inputPop(ctxt)) != NULL) {
1591 xmlFreeInputStream(input);
1592 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001593 while ((oldname = namePop(ctxt)) != NULL) {
1594 xmlFree(oldname);
1595 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001596 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001597 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001598 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1599 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1600 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1601 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Daniel Veillardcf461992000-03-14 18:30:20 +00001602 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1603 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1604 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1605 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001606 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
Daniel Veillard6454aec1999-09-02 22:04:43 +00001607 xmlFree(ctxt->sax);
1608 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1609 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001610}
1611
1612/**
1613 * xmlNewParserCtxt:
1614 *
1615 * Allocate and initialize a new parser context.
1616 *
1617 * Returns the xmlParserCtxtPtr or NULL
1618 */
1619
1620xmlParserCtxtPtr
1621xmlNewParserCtxt()
1622{
1623 xmlParserCtxtPtr ctxt;
1624
Daniel Veillard6454aec1999-09-02 22:04:43 +00001625 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001626 if (ctxt == NULL) {
1627 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1628 perror("malloc");
1629 return(NULL);
1630 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001631 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001632 xmlInitParserCtxt(ctxt);
1633 return(ctxt);
1634}
1635
1636/**
1637 * xmlClearParserCtxt:
1638 * @ctxt: an XML parser context
1639 *
1640 * Clear (release owned resources) and reinitialize a parser context
1641 */
1642
1643void
1644xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1645{
1646 xmlClearNodeInfoSeq(&ctxt->node_seq);
1647 xmlInitParserCtxt(ctxt);
1648}
1649
1650/************************************************************************
1651 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001652 * Commodity functions to handle entities *
1653 * *
1654 ************************************************************************/
1655
Daniel Veillardcf461992000-03-14 18:30:20 +00001656/**
1657 * xmlCheckEntity:
1658 * @ctxt: an XML parser context
1659 * @content: the entity content string
1660 *
1661 * Parse an entity content and checks the WF constraints
1662 *
1663 */
1664
1665void
1666xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1667}
Daniel Veillardb05deb71999-08-10 19:04:08 +00001668
1669/**
1670 * xmlParseCharRef:
1671 * @ctxt: an XML parser context
1672 *
1673 * parse Reference declarations
1674 *
1675 * [66] CharRef ::= '&#' [0-9]+ ';' |
1676 * '&#x' [0-9a-fA-F]+ ';'
1677 *
1678 * [ WFC: Legal Character ]
1679 * Characters referred to using character references must match the
1680 * production for Char.
1681 *
Daniel Veillard10a2c651999-12-12 13:03:50 +00001682 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +00001683 */
1684int
1685xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1686 int val = 0;
1687
1688 if (ctxt->token != 0) {
1689 val = ctxt->token;
1690 ctxt->token = 0;
1691 return(val);
1692 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001693 if ((RAW == '&') && (NXT(1) == '#') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00001694 (NXT(2) == 'x')) {
1695 SKIP(3);
Daniel Veillardcf461992000-03-14 18:30:20 +00001696 while (RAW != ';') {
1697 if ((RAW >= '0') && (RAW <= '9'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001698 val = val * 16 + (CUR - '0');
Daniel Veillardcf461992000-03-14 18:30:20 +00001699 else if ((RAW >= 'a') && (RAW <= 'f'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001700 val = val * 16 + (CUR - 'a') + 10;
Daniel Veillardcf461992000-03-14 18:30:20 +00001701 else if ((RAW >= 'A') && (RAW <= 'F'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001702 val = val * 16 + (CUR - 'A') + 10;
1703 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001704 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706 ctxt->sax->error(ctxt->userData,
1707 "xmlParseCharRef: invalid hexadecimal value\n");
1708 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001709 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001710 val = 0;
1711 break;
1712 }
1713 NEXT;
1714 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001715 if (RAW == ';') {
1716 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1717 ctxt->nbChars ++;
1718 ctxt->input->cur++;
1719 }
1720 } else if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001721 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00001722 while (RAW != ';') {
1723 if ((RAW >= '0') && (RAW <= '9'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001724 val = val * 10 + (CUR - '0');
1725 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001726 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1728 ctxt->sax->error(ctxt->userData,
1729 "xmlParseCharRef: invalid decimal value\n");
1730 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001731 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001732 val = 0;
1733 break;
1734 }
1735 NEXT;
1736 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001737 if (RAW == ';') {
1738 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1739 ctxt->nbChars ++;
1740 ctxt->input->cur++;
1741 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001742 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001743 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1745 ctxt->sax->error(ctxt->userData,
1746 "xmlParseCharRef: invalid value\n");
1747 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001748 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001749 }
1750
1751 /*
1752 * [ WFC: Legal Character ]
1753 * Characters referred to using character references must match the
1754 * production for Char.
1755 */
1756 if (IS_CHAR(val)) {
1757 return(val);
1758 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001759 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001761 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +00001762 val);
1763 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001764 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001765 }
1766 return(0);
1767}
1768
1769/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00001770 * xmlParseStringCharRef:
1771 * @ctxt: an XML parser context
1772 * @str: a pointer to an index in the string
1773 *
1774 * parse Reference declarations, variant parsing from a string rather
1775 * than an an input flow.
1776 *
1777 * [66] CharRef ::= '&#' [0-9]+ ';' |
1778 * '&#x' [0-9a-fA-F]+ ';'
1779 *
1780 * [ WFC: Legal Character ]
1781 * Characters referred to using character references must match the
1782 * production for Char.
1783 *
1784 * Returns the value parsed (as an int), 0 in case of error, str will be
1785 * updated to the current value of the index
1786 */
1787int
1788xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1789 const xmlChar *ptr;
1790 xmlChar cur;
1791 int val = 0;
1792
1793 if ((str == NULL) || (*str == NULL)) return(0);
1794 ptr = *str;
1795 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +00001796 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001797 ptr += 3;
1798 cur = *ptr;
1799 while (cur != ';') {
1800 if ((cur >= '0') && (cur <= '9'))
1801 val = val * 16 + (cur - '0');
1802 else if ((cur >= 'a') && (cur <= 'f'))
1803 val = val * 16 + (cur - 'a') + 10;
1804 else if ((cur >= 'A') && (cur <= 'F'))
1805 val = val * 16 + (cur - 'A') + 10;
1806 else {
1807 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1809 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +00001810 "xmlParseStringCharRef: invalid hexadecimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00001811 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001812 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001813 val = 0;
1814 break;
1815 }
1816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +00001821 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +00001822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') {
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
1828 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +00001831 "xmlParseStringCharRef: invalid decimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00001832 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001833 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001834 val = 0;
1835 break;
1836 }
1837 ptr++;
1838 cur = *ptr;
1839 }
1840 if (cur == ';')
1841 ptr++;
1842 } else {
1843 ctxt->errNo = XML_ERR_INVALID_CHARREF;
1844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1845 ctxt->sax->error(ctxt->userData,
1846 "xmlParseCharRef: invalid value\n");
1847 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001848 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001849 return(0);
1850 }
1851 *str = ptr;
1852
1853 /*
1854 * [ WFC: Legal Character ]
1855 * Characters referred to using character references must match the
1856 * production for Char.
1857 */
1858 if (IS_CHAR(val)) {
1859 return(val);
1860 } else {
1861 ctxt->errNo = XML_ERR_INVALID_CHAR;
1862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1863 ctxt->sax->error(ctxt->userData,
1864 "CharRef: invalid xmlChar value %d\n", val);
1865 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001866 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001867 }
1868 return(0);
1869}
1870
1871/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00001872 * xmlParserHandleReference:
1873 * @ctxt: the parser context
1874 *
1875 * [67] Reference ::= EntityRef | CharRef
1876 *
1877 * [68] EntityRef ::= '&' Name ';'
1878 *
1879 * [ WFC: Entity Declared ]
1880 * the Name given in the entity reference must match that in an entity
1881 * declaration, except that well-formed documents need not declare any
1882 * of the following entities: amp, lt, gt, apos, quot.
1883 *
1884 * [ WFC: Parsed Entity ]
1885 * An entity reference must not contain the name of an unparsed entity
1886 *
1887 * [66] CharRef ::= '&#' [0-9]+ ';' |
1888 * '&#x' [0-9a-fA-F]+ ';'
1889 *
1890 * A PEReference may have been detectect in the current input stream
1891 * the handling is done accordingly to
1892 * http://www.w3.org/TR/REC-xml#entproc
1893 */
1894void
1895xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1896 xmlParserInputPtr input;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001897 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001898 xmlEntityPtr ent = NULL;
1899
Daniel Veillard35008381999-10-25 13:15:52 +00001900 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001901 return;
1902 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001903 if (RAW != '&') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001904 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001905 if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001906 switch(ctxt->instate) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001907 case XML_PARSER_ENTITY_DECL:
1908 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001909 case XML_PARSER_CDATA_SECTION:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001910 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00001911 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001912 /* we just ignore it there */
1913 return;
1914 case XML_PARSER_START_TAG:
1915 return;
1916 case XML_PARSER_END_TAG:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001917 return;
1918 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001919 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1921 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1922 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001923 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001924 return;
1925 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001926 case XML_PARSER_START:
1927 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001928 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1930 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1931 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001932 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001933 return;
1934 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001935 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1937 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1938 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001939 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001940 return;
1941 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001942 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944 ctxt->sax->error(ctxt->userData,
1945 "CharRef are forbiden in DTDs!\n");
1946 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001947 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001948 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001949 case XML_PARSER_ENTITY_VALUE:
1950 /*
1951 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001952 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001953 * entity value to be able to save the internal
1954 * subset of the document.
1955 * This will be handled by xmlDecodeEntities
1956 */
1957 return;
1958 case XML_PARSER_CONTENT:
1959 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001960 ctxt->token = xmlParseCharRef(ctxt);
1961 return;
1962 }
1963 return;
1964 }
1965
1966 switch(ctxt->instate) {
1967 case XML_PARSER_CDATA_SECTION:
1968 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001969 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001970 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00001971 case XML_PARSER_SYSTEM_LITERAL:
1972 case XML_PARSER_CONTENT:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001973 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001974 case XML_PARSER_START_TAG:
1975 return;
1976 case XML_PARSER_END_TAG:
1977 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001978 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001979 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1982 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001983 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001984 return;
1985 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001986 case XML_PARSER_START:
1987 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001988 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1991 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001992 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001993 return;
1994 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001995 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1998 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001999 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002000 return;
2001 case XML_PARSER_ENTITY_VALUE:
2002 /*
2003 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00002004 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00002005 * entity value to be able to save the internal
2006 * subset of the document.
2007 * This will be handled by xmlDecodeEntities
2008 */
2009 return;
2010 case XML_PARSER_ATTRIBUTE_VALUE:
2011 /*
2012 * NOTE: in the case of attributes values, we don't do the
2013 * substitution here unless we are in a mode where
2014 * the parser is explicitely asked to substitute
2015 * entities. The SAX callback is called with values
2016 * without entity substitution.
2017 * This will then be handled by xmlDecodeEntities
2018 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00002019 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002020 case XML_PARSER_ENTITY_DECL:
2021 /*
2022 * we just ignore it there
2023 * the substitution will be done once the entity is referenced
2024 */
2025 return;
2026 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002027 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029 ctxt->sax->error(ctxt->userData,
2030 "Entity references are forbiden in DTDs!\n");
2031 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002032 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002033 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002034 }
2035
2036 NEXT;
2037 name = xmlScanName(ctxt);
2038 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002039 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2041 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
2042 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002043 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002044 ctxt->token = '&';
2045 return;
2046 }
2047 if (NXT(xmlStrlen(name)) != ';') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002048 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050 ctxt->sax->error(ctxt->userData,
2051 "Entity reference: ';' expected\n");
2052 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002053 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002054 ctxt->token = '&';
Daniel Veillard6454aec1999-09-02 22:04:43 +00002055 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002056 return;
2057 }
2058 SKIP(xmlStrlen(name) + 1);
2059 if (ctxt->sax != NULL) {
2060 if (ctxt->sax->getEntity != NULL)
2061 ent = ctxt->sax->getEntity(ctxt->userData, name);
2062 }
2063
2064 /*
2065 * [ WFC: Entity Declared ]
2066 * the Name given in the entity reference must match that in an entity
2067 * declaration, except that well-formed documents need not declare any
2068 * of the following entities: amp, lt, gt, apos, quot.
2069 */
2070 if (ent == NULL)
2071 ent = xmlGetPredefinedEntity(name);
2072 if (ent == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002073 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075 ctxt->sax->error(ctxt->userData,
2076 "Entity reference: entity %s not declared\n",
2077 name);
2078 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002079 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00002080 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002081 return;
2082 }
2083
2084 /*
2085 * [ WFC: Parsed Entity ]
2086 * An entity reference must not contain the name of an unparsed entity
2087 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002088 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002089 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2091 ctxt->sax->error(ctxt->userData,
2092 "Entity reference to unparsed entity %s\n", name);
2093 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002094 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002095 }
2096
Daniel Veillardcf461992000-03-14 18:30:20 +00002097 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002098 ctxt->token = ent->content[0];
Daniel Veillard6454aec1999-09-02 22:04:43 +00002099 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002100 return;
2101 }
2102 input = xmlNewEntityInputStream(ctxt, ent);
2103 xmlPushInput(ctxt, input);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002104 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002105 return;
2106}
2107
2108/**
2109 * xmlParserHandlePEReference:
2110 * @ctxt: the parser context
2111 *
2112 * [69] PEReference ::= '%' Name ';'
2113 *
2114 * [ WFC: No Recursion ]
2115 * TODO A parsed entity must not contain a recursive
2116 * reference to itself, either directly or indirectly.
2117 *
2118 * [ WFC: Entity Declared ]
2119 * In a document without any DTD, a document with only an internal DTD
2120 * subset which contains no parameter entity references, or a document
2121 * with "standalone='yes'", ... ... The declaration of a parameter
2122 * entity must precede any reference to it...
2123 *
2124 * [ VC: Entity Declared ]
2125 * In a document with an external subset or external parameter entities
2126 * with "standalone='no'", ... ... The declaration of a parameter entity
2127 * must precede any reference to it...
2128 *
2129 * [ WFC: In DTD ]
2130 * Parameter-entity references may only appear in the DTD.
2131 * NOTE: misleading but this is handled.
2132 *
2133 * A PEReference may have been detected in the current input stream
2134 * the handling is done accordingly to
2135 * http://www.w3.org/TR/REC-xml#entproc
2136 * i.e.
2137 * - Included in literal in entity values
2138 * - Included as Paraemeter Entity reference within DTDs
2139 */
2140void
2141xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002142 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002143 xmlEntityPtr entity = NULL;
2144 xmlParserInputPtr input;
2145
Daniel Veillard35008381999-10-25 13:15:52 +00002146 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00002147 return;
2148 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002149 if (RAW != '%') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002150 switch(ctxt->instate) {
2151 case XML_PARSER_CDATA_SECTION:
2152 return;
2153 case XML_PARSER_COMMENT:
2154 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002155 case XML_PARSER_START_TAG:
2156 return;
2157 case XML_PARSER_END_TAG:
2158 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002159 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002160 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2162 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
2163 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002164 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002165 return;
2166 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002167 case XML_PARSER_START:
2168 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002169 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
2172 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002173 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002174 return;
2175 case XML_PARSER_ENTITY_DECL:
2176 case XML_PARSER_CONTENT:
2177 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002178 case XML_PARSER_PI:
Daniel Veillardcf461992000-03-14 18:30:20 +00002179 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardb05deb71999-08-10 19:04:08 +00002180 /* we just ignore it there */
2181 return;
2182 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002183 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2185 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
2186 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002187 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002188 return;
2189 case XML_PARSER_ENTITY_VALUE:
2190 /*
2191 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00002192 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00002193 * entity value to be able to save the internal
2194 * subset of the document.
2195 * This will be handled by xmlDecodeEntities
2196 */
2197 return;
2198 case XML_PARSER_DTD:
2199 /*
2200 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2201 * In the internal DTD subset, parameter-entity references
2202 * can occur only where markup declarations can occur, not
2203 * within markup declarations.
2204 * In that case this is handled in xmlParseMarkupDecl
2205 */
2206 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2207 return;
2208 }
2209
2210 NEXT;
2211 name = xmlParseName(ctxt);
2212 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002213 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
2216 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002217 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002218 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002219 if (RAW == ';') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002220 NEXT;
2221 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2222 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2223 if (entity == NULL) {
2224
2225 /*
2226 * [ WFC: Entity Declared ]
2227 * In a document without any DTD, a document with only an
2228 * internal DTD subset which contains no parameter entity
2229 * references, or a document with "standalone='yes'", ...
2230 * ... The declaration of a parameter entity must precede
2231 * any reference to it...
2232 */
2233 if ((ctxt->standalone == 1) ||
2234 ((ctxt->hasExternalSubset == 0) &&
2235 (ctxt->hasPErefs == 0))) {
2236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237 ctxt->sax->error(ctxt->userData,
2238 "PEReference: %%%s; not found\n", name);
2239 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002240 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002241 } else {
2242 /*
2243 * [ VC: Entity Declared ]
2244 * In a document with an external subset or external
2245 * parameter entities with "standalone='no'", ...
2246 * ... The declaration of a parameter entity must precede
2247 * any reference to it...
2248 */
Daniel Veillard87b95392000-08-12 21:12:04 +00002249 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2250 ctxt->vctxt.error(ctxt->vctxt.userData,
2251 "PEReference: %%%s; not found\n", name);
2252 } else
Daniel Veillardb05deb71999-08-10 19:04:08 +00002253 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2254 ctxt->sax->warning(ctxt->userData,
2255 "PEReference: %%%s; not found\n", name);
2256 ctxt->valid = 0;
2257 }
2258 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002259 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2260 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002261 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00002262 * TODO !!! handle the extra spaces added before and after
Daniel Veillardb05deb71999-08-10 19:04:08 +00002263 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardb05deb71999-08-10 19:04:08 +00002264 */
2265 input = xmlNewEntityInputStream(ctxt, entity);
2266 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00002267 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2268 (RAW == '<') && (NXT(1) == '?') &&
2269 (NXT(2) == 'x') && (NXT(3) == 'm') &&
2270 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
2271 xmlParseTextDecl(ctxt);
2272 }
2273 if (ctxt->token == 0)
2274 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00002275 } else {
2276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277 ctxt->sax->error(ctxt->userData,
2278 "xmlHandlePEReference: %s is not a parameter entity\n",
2279 name);
2280 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002281 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002282 }
2283 }
2284 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002285 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287 ctxt->sax->error(ctxt->userData,
2288 "xmlHandlePEReference: expecting ';'\n");
2289 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002290 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002291 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002292 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002293 }
2294}
2295
Daniel Veillard011b63c1999-06-02 17:44:04 +00002296/*
2297 * Macro used to grow the current buffer.
2298 */
2299#define growBuffer(buffer) { \
2300 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +00002301 buffer = (xmlChar *) \
2302 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00002303 if (buffer == NULL) { \
2304 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +00002305 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00002306 } \
2307}
2308
Daniel Veillard011b63c1999-06-02 17:44:04 +00002309/**
2310 * xmlDecodeEntities:
2311 * @ctxt: the parser context
2312 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2313 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002314 * @end: an end marker xmlChar, 0 if none
2315 * @end2: an end marker xmlChar, 0 if none
2316 * @end3: an end marker xmlChar, 0 if none
Daniel Veillard011b63c1999-06-02 17:44:04 +00002317 *
2318 * [67] Reference ::= EntityRef | CharRef
2319 *
2320 * [69] PEReference ::= '%' Name ';'
2321 *
2322 * Returns A newly allocated string with the substitution done. The caller
2323 * must deallocate it !
2324 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002325xmlChar *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002326xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002327 xmlChar end, xmlChar end2, xmlChar end3) {
2328 xmlChar *buffer = NULL;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00002329 unsigned int buffer_size = 0;
2330 unsigned int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002331
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002332 xmlChar *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002333 xmlEntityPtr ent;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002334 unsigned int max = (unsigned int) len;
Daniel Veillardcf461992000-03-14 18:30:20 +00002335 int c,l;
2336
2337 if (ctxt->depth > 40) {
2338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339 ctxt->sax->error(ctxt->userData,
2340 "Detected entity reference loop\n");
2341 ctxt->wellFormed = 0;
2342 ctxt->disableSAX = 1;
2343 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2344 return(NULL);
2345 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002346
2347 /*
2348 * allocate a translation buffer.
2349 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002350 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002351 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
Daniel Veillard011b63c1999-06-02 17:44:04 +00002352 if (buffer == NULL) {
2353 perror("xmlDecodeEntities: malloc failed");
2354 return(NULL);
2355 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002356
2357 /*
2358 * Ok loop until we reach one of the ending char or a size limit.
2359 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002360 c = CUR_CHAR(l);
2361 while ((nbchars < max) && (c != end) &&
2362 (c != end2) && (c != end3)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002363
Daniel Veillardcf461992000-03-14 18:30:20 +00002364 if (c == 0) break;
2365 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002366 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00002367 COPY_BUF(0,buffer,nbchars,val);
2368 NEXTL(l);
2369 } else if ((c == '&') && (ctxt->token != '&') &&
2370 (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002371 ent = xmlParseEntityRef(ctxt);
2372 if ((ent != NULL) &&
2373 (ctxt->replaceEntities != 0)) {
2374 current = ent->content;
2375 while (*current != 0) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002376 buffer[nbchars++] = *current++;
2377 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002378 growBuffer(buffer);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002379 }
2380 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00002381 } else if (ent != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002382 const xmlChar *cur = ent->name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002383
Daniel Veillardcf461992000-03-14 18:30:20 +00002384 buffer[nbchars++] = '&';
2385 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002386 growBuffer(buffer);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002387 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002388 while (*cur != 0) {
2389 buffer[nbchars++] = *cur++;
2390 }
2391 buffer[nbchars++] = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00002392 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002393 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002394 /*
2395 * a PEReference induce to switch the entity flow,
2396 * we break here to flush the current set of chars
2397 * parsed if any. We will be called back later.
2398 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002399 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002400
2401 xmlParsePEReference(ctxt);
2402
2403 /*
2404 * Pop-up of finished entities.
2405 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002406 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002407 xmlPopInput(ctxt);
2408
Daniel Veillardb05deb71999-08-10 19:04:08 +00002409 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002410 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002411 COPY_BUF(l,buffer,nbchars,c);
2412 NEXTL(l);
2413 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Raph Levien05240da1999-06-15 21:27:11 +00002414 growBuffer(buffer);
Raph Levien05240da1999-06-15 21:27:11 +00002415 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002416 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002417 c = CUR_CHAR(l);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002418 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002419 buffer[nbchars++] = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002420 return(buffer);
2421}
2422
Daniel Veillard10a2c651999-12-12 13:03:50 +00002423/**
2424 * xmlStringDecodeEntities:
2425 * @ctxt: the parser context
2426 * @str: the input string
2427 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2428 * @end: an end marker xmlChar, 0 if none
2429 * @end2: an end marker xmlChar, 0 if none
2430 * @end3: an end marker xmlChar, 0 if none
2431 *
2432 * [67] Reference ::= EntityRef | CharRef
2433 *
2434 * [69] PEReference ::= '%' Name ';'
2435 *
2436 * Returns A newly allocated string with the substitution done. The caller
2437 * must deallocate it !
2438 */
2439xmlChar *
2440xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2441 xmlChar end, xmlChar end2, xmlChar end3) {
2442 xmlChar *buffer = NULL;
2443 int buffer_size = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002444
2445 xmlChar *current = NULL;
2446 xmlEntityPtr ent;
Daniel Veillardcf461992000-03-14 18:30:20 +00002447 int c,l;
2448 int nbchars = 0;
2449
Daniel Veillard87b95392000-08-12 21:12:04 +00002450 if (str == NULL)
2451 return(NULL);
2452
Daniel Veillardcf461992000-03-14 18:30:20 +00002453 if (ctxt->depth > 40) {
2454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455 ctxt->sax->error(ctxt->userData,
2456 "Detected entity reference loop\n");
2457 ctxt->wellFormed = 0;
2458 ctxt->disableSAX = 1;
2459 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2460 return(NULL);
2461 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002462
2463 /*
2464 * allocate a translation buffer.
2465 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002466 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002467 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2468 if (buffer == NULL) {
2469 perror("xmlDecodeEntities: malloc failed");
2470 return(NULL);
2471 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002472
2473 /*
2474 * Ok loop until we reach one of the ending char or a size limit.
2475 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002476 c = CUR_SCHAR(str, l);
2477 while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002478
Daniel Veillardcf461992000-03-14 18:30:20 +00002479 if (c == 0) break;
2480 if ((c == '&') && (str[1] == '#')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002481 int val = xmlParseStringCharRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +00002482 if (val != 0) {
2483 COPY_BUF(0,buffer,nbchars,val);
2484 }
2485 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002486 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +00002487 if ((ent != NULL) && (ent->content != NULL)) {
2488 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002489
Daniel Veillardcf461992000-03-14 18:30:20 +00002490 ctxt->depth++;
2491 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2492 0, 0, 0);
2493 ctxt->depth--;
2494 if (rep != NULL) {
2495 current = rep;
2496 while (*current != 0) {
2497 buffer[nbchars++] = *current++;
2498 if (nbchars >
2499 buffer_size - XML_PARSER_BUFFER_SIZE) {
2500 growBuffer(buffer);
2501 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002502 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002503 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002504 }
2505 } else if (ent != NULL) {
2506 int i = xmlStrlen(ent->name);
2507 const xmlChar *cur = ent->name;
2508
Daniel Veillardcf461992000-03-14 18:30:20 +00002509 buffer[nbchars++] = '&';
2510 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002511 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002512 }
2513 for (;i > 0;i--)
Daniel Veillardcf461992000-03-14 18:30:20 +00002514 buffer[nbchars++] = *cur++;
2515 buffer[nbchars++] = ';';
Daniel Veillard10a2c651999-12-12 13:03:50 +00002516 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002517 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002518 ent = xmlParseStringPEReference(ctxt, &str);
2519 if (ent != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002520 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002521
Daniel Veillardcf461992000-03-14 18:30:20 +00002522 ctxt->depth++;
2523 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2524 0, 0, 0);
2525 ctxt->depth--;
2526 if (rep != NULL) {
2527 current = rep;
2528 while (*current != 0) {
2529 buffer[nbchars++] = *current++;
2530 if (nbchars >
2531 buffer_size - XML_PARSER_BUFFER_SIZE) {
2532 growBuffer(buffer);
2533 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002534 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002535 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002536 }
2537 }
2538 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002539 COPY_BUF(l,buffer,nbchars,c);
2540 str += l;
2541 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002542 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002543 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002544 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002545 c = CUR_SCHAR(str, l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002546 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002547 buffer[nbchars++] = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002548 return(buffer);
2549}
2550
Daniel Veillard260a68f1998-08-13 03:39:55 +00002551
2552/************************************************************************
2553 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00002554 * Commodity functions to handle encodings *
2555 * *
2556 ************************************************************************/
2557
Daniel Veillardcf461992000-03-14 18:30:20 +00002558/*
2559 * xmlCheckLanguageID
2560 * @lang: pointer to the string value
2561 *
2562 * Checks that the value conforms to the LanguageID production:
2563 *
2564 * [33] LanguageID ::= Langcode ('-' Subcode)*
2565 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2566 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2567 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2568 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2569 * [38] Subcode ::= ([a-z] | [A-Z])+
2570 *
2571 * Returns 1 if correct 0 otherwise
2572 **/
2573int
2574xmlCheckLanguageID(const xmlChar *lang) {
2575 const xmlChar *cur = lang;
2576
2577 if (cur == NULL)
2578 return(0);
2579 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2580 ((cur[0] == 'I') && (cur[1] == '-'))) {
2581 /*
2582 * IANA code
2583 */
2584 cur += 2;
2585 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2586 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2587 cur++;
2588 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2589 ((cur[0] == 'X') && (cur[1] == '-'))) {
2590 /*
2591 * User code
2592 */
2593 cur += 2;
2594 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2595 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2596 cur++;
2597 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2598 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2599 /*
2600 * ISO639
2601 */
2602 cur++;
2603 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2604 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2605 cur++;
2606 else
2607 return(0);
2608 } else
2609 return(0);
2610 while (cur[0] != 0) {
2611 if (cur[0] != '-')
2612 return(0);
2613 cur++;
2614 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2615 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2616 cur++;
2617 else
2618 return(0);
2619 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2620 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2621 cur++;
2622 }
2623 return(1);
2624}
2625
Daniel Veillard27d88741999-05-29 11:51:49 +00002626/**
2627 * xmlSwitchEncoding:
2628 * @ctxt: the parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00002629 * @enc: the encoding value (number)
Daniel Veillard27d88741999-05-29 11:51:49 +00002630 *
2631 * change the input functions when discovering the character encoding
2632 * of a given entity.
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002633 *
2634 * Returns 0 in case of success, -1 otherwise
Daniel Veillard27d88741999-05-29 11:51:49 +00002635 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002636int
Daniel Veillard27d88741999-05-29 11:51:49 +00002637xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2638{
Daniel Veillardcf461992000-03-14 18:30:20 +00002639 xmlCharEncodingHandlerPtr handler;
2640
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002641 switch (enc) {
2642 case XML_CHAR_ENCODING_ERROR:
2643 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2645 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2646 ctxt->wellFormed = 0;
2647 ctxt->disableSAX = 1;
2648 break;
2649 case XML_CHAR_ENCODING_NONE:
2650 /* let's assume it's UTF-8 without the XML decl */
Daniel Veillardbe803962000-06-28 23:40:59 +00002651 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002652 return(0);
2653 case XML_CHAR_ENCODING_UTF8:
2654 /* default encoding, no conversion should be needed */
Daniel Veillardbe803962000-06-28 23:40:59 +00002655 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002656 return(0);
2657 default:
2658 break;
2659 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002660 handler = xmlGetCharEncodingHandler(enc);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002661 if (handler == NULL) {
2662 /*
2663 * Default handlers.
2664 */
2665 switch (enc) {
2666 case XML_CHAR_ENCODING_ERROR:
2667 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
Daniel Veillardbe803962000-06-28 23:40:59 +00002672 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002673 break;
2674 case XML_CHAR_ENCODING_NONE:
2675 /* let's assume it's UTF-8 without the XML decl */
Daniel Veillardbe803962000-06-28 23:40:59 +00002676 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002677 return(0);
2678 case XML_CHAR_ENCODING_UTF8:
Daniel Veillard87b95392000-08-12 21:12:04 +00002679 case XML_CHAR_ENCODING_ASCII:
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002680 /* default encoding, no conversion should be needed */
Daniel Veillardbe803962000-06-28 23:40:59 +00002681 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002682 return(0);
2683 case XML_CHAR_ENCODING_UTF16LE:
2684 break;
2685 case XML_CHAR_ENCODING_UTF16BE:
2686 break;
2687 case XML_CHAR_ENCODING_UCS4LE:
2688 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2690 ctxt->sax->error(ctxt->userData,
2691 "char encoding USC4 little endian not supported\n");
2692 break;
2693 case XML_CHAR_ENCODING_UCS4BE:
2694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2696 ctxt->sax->error(ctxt->userData,
2697 "char encoding USC4 big endian not supported\n");
2698 break;
2699 case XML_CHAR_ENCODING_EBCDIC:
2700 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2702 ctxt->sax->error(ctxt->userData,
2703 "char encoding EBCDIC not supported\n");
2704 break;
2705 case XML_CHAR_ENCODING_UCS4_2143:
2706 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2708 ctxt->sax->error(ctxt->userData,
2709 "char encoding UCS4 2143 not supported\n");
2710 break;
2711 case XML_CHAR_ENCODING_UCS4_3412:
2712 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2714 ctxt->sax->error(ctxt->userData,
2715 "char encoding UCS4 3412 not supported\n");
2716 break;
2717 case XML_CHAR_ENCODING_UCS2:
2718 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2720 ctxt->sax->error(ctxt->userData,
2721 "char encoding UCS2 not supported\n");
2722 break;
2723 case XML_CHAR_ENCODING_8859_1:
2724 case XML_CHAR_ENCODING_8859_2:
2725 case XML_CHAR_ENCODING_8859_3:
2726 case XML_CHAR_ENCODING_8859_4:
2727 case XML_CHAR_ENCODING_8859_5:
2728 case XML_CHAR_ENCODING_8859_6:
2729 case XML_CHAR_ENCODING_8859_7:
2730 case XML_CHAR_ENCODING_8859_8:
2731 case XML_CHAR_ENCODING_8859_9:
2732 /*
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002733 * We used to keep the internal content in the
2734 * document encoding however this turns being unmaintainable
2735 * So xmlGetCharEncodingHandler() will return non-null
2736 * values for this now.
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002737 */
2738 if ((ctxt->inputNr == 1) &&
2739 (ctxt->encoding == NULL) &&
2740 (ctxt->input->encoding != NULL)) {
2741 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2742 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002743 ctxt->charset = enc;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002744 return(0);
2745 case XML_CHAR_ENCODING_2022_JP:
2746 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2748 ctxt->sax->error(ctxt->userData,
2749 "char encoding ISO-2022-JPnot supported\n");
2750 break;
2751 case XML_CHAR_ENCODING_SHIFT_JIS:
2752 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "char encoding Shift_JIS not supported\n");
2756 break;
2757 case XML_CHAR_ENCODING_EUC_JP:
2758 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2760 ctxt->sax->error(ctxt->userData,
2761 "char encoding EUC-JPnot supported\n");
2762 break;
2763 }
2764 }
2765 if (handler == NULL)
2766 return(-1);
Daniel Veillardbe803962000-06-28 23:40:59 +00002767 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002768 return(xmlSwitchToEncoding(ctxt, handler));
2769}
2770
2771/**
2772 * xmlSwitchToEncoding:
2773 * @ctxt: the parser context
2774 * @handler: the encoding handler
2775 *
2776 * change the input functions when discovering the character encoding
2777 * of a given entity.
2778 *
2779 * Returns 0 in case of success, -1 otherwise
2780 */
2781int
2782xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2783{
2784 int nbchars;
2785
Daniel Veillardcf461992000-03-14 18:30:20 +00002786 if (handler != NULL) {
2787 if (ctxt->input != NULL) {
2788 if (ctxt->input->buf != NULL) {
2789 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002790 if (ctxt->input->buf->encoder == handler)
2791 return(0);
Daniel Veillardbe803962000-06-28 23:40:59 +00002792 /*
2793 * Note: this is a bit dangerous, but that's what it
2794 * takes to use nearly compatible signature for different
2795 * encodings.
2796 */
2797 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2798 ctxt->input->buf->encoder = handler;
2799 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002800 }
2801 ctxt->input->buf->encoder = handler;
2802
2803 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002804 * Is there already some content down the pipe to convert ?
Daniel Veillardcf461992000-03-14 18:30:20 +00002805 */
2806 if ((ctxt->input->buf->buffer != NULL) &&
2807 (ctxt->input->buf->buffer->use > 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002808 int processed;
2809
2810 /*
2811 * Specific handling of the Byte Order Mark for
2812 * UTF-16
2813 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002814 if ((handler->name != NULL) &&
2815 (!strcmp(handler->name, "UTF-16LE")) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00002816 (ctxt->input->cur[0] == 0xFF) &&
2817 (ctxt->input->cur[1] == 0xFE)) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002818 ctxt->input->cur += 2;
Daniel Veillardcf461992000-03-14 18:30:20 +00002819 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002820 if ((handler->name != NULL) &&
2821 (!strcmp(handler->name, "UTF-16BE")) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00002822 (ctxt->input->cur[0] == 0xFE) &&
2823 (ctxt->input->cur[1] == 0xFF)) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002824 ctxt->input->cur += 2;
Daniel Veillardcf461992000-03-14 18:30:20 +00002825 }
2826
2827 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002828 * Shring the current input buffer.
2829 * Move it as the raw buffer and create a new input buffer
Daniel Veillardcf461992000-03-14 18:30:20 +00002830 */
2831 processed = ctxt->input->cur - ctxt->input->base;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002832 xmlBufferShrink(ctxt->input->buf->buffer, processed);
2833 ctxt->input->buf->raw = ctxt->input->buf->buffer;
2834 ctxt->input->buf->buffer = xmlBufferCreate();
Daniel Veillardcf461992000-03-14 18:30:20 +00002835
2836 /*
Daniel Veillardbe803962000-06-28 23:40:59 +00002837 * convert just enough to get
2838 * '<?xml version="1.0" encoding="xxx"?>'
2839 * parsed with the autodetected encoding
2840 * into the parser reading buffer.
Daniel Veillardcf461992000-03-14 18:30:20 +00002841 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002842 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2843 ctxt->input->buf->buffer,
2844 ctxt->input->buf->raw);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002845 if (nbchars < 0) {
2846 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2847 return(-1);
2848 }
2849 ctxt->input->base =
2850 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard87b95392000-08-12 21:12:04 +00002851
Daniel Veillardcf461992000-03-14 18:30:20 +00002852 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002853 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002854 } else {
Daniel Veillard46e370e2000-07-21 20:32:03 +00002855 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002856 /*
2857 * When parsing a static memory array one must know the
2858 * size to be able to convert the buffer.
2859 */
2860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861 ctxt->sax->error(ctxt->userData,
2862 "xmlSwitchEncoding : no input\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002863 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002864 } else {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002865 int processed;
Daniel Veillardcf461992000-03-14 18:30:20 +00002866
2867 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002868 * Shring the current input buffer.
2869 * Move it as the raw buffer and create a new input buffer
Daniel Veillardcf461992000-03-14 18:30:20 +00002870 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002871 processed = ctxt->input->cur - ctxt->input->base;
Daniel Veillard46e370e2000-07-21 20:32:03 +00002872
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002873 ctxt->input->buf->raw = xmlBufferCreate();
2874 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
Daniel Veillard46e370e2000-07-21 20:32:03 +00002875 ctxt->input->length - processed);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002876 ctxt->input->buf->buffer = xmlBufferCreate();
2877
2878 /*
2879 * convert as much as possible of the raw input
2880 * to the parser reading buffer.
2881 */
2882 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2883 ctxt->input->buf->buffer,
2884 ctxt->input->buf->raw);
2885 if (nbchars < 0) {
2886 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2887 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002888 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002889
Daniel Veillardcf461992000-03-14 18:30:20 +00002890 /*
2891 * Conversion succeeded, get rid of the old buffer
2892 */
2893 if ((ctxt->input->free != NULL) &&
2894 (ctxt->input->base != NULL))
2895 ctxt->input->free((xmlChar *) ctxt->input->base);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002896 ctxt->input->base =
2897 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +00002898 }
2899 }
2900 } else {
2901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2902 ctxt->sax->error(ctxt->userData,
2903 "xmlSwitchEncoding : no input\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002904 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002905 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002906 /*
2907 * The parsing is now done in UTF8 natively
2908 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002909 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002910 } else
2911 return(-1);
2912 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002913
Daniel Veillard27d88741999-05-29 11:51:49 +00002914}
2915
2916/************************************************************************
2917 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002918 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002919 * *
2920 ************************************************************************/
2921
Daniel Veillard11e00581998-10-24 18:27:49 +00002922/**
2923 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002924 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002925 * @len: the len of @cur
2926 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002927 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002928 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002929 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002930 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002931xmlChar *
2932xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002933 xmlChar *ret;
2934
2935 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002936 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002937 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002938 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002939 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002940 return(NULL);
2941 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002942 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002943 ret[len] = 0;
2944 return(ret);
2945}
2946
Daniel Veillard11e00581998-10-24 18:27:49 +00002947/**
2948 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002949 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002950 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002951 * a strdup for array of xmlChar's. Since they are supposed to be
2952 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2953 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00002954 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002955 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002956 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002957xmlChar *
2958xmlStrdup(const xmlChar *cur) {
2959 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002960
Daniel Veillard10a2c651999-12-12 13:03:50 +00002961 if (cur == NULL) return(NULL);
Daniel Veillardcf461992000-03-14 18:30:20 +00002962 while (*p != 0) p++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002963 return(xmlStrndup(cur, p - cur));
2964}
2965
Daniel Veillard11e00581998-10-24 18:27:49 +00002966/**
2967 * xmlCharStrndup:
2968 * @cur: the input char *
2969 * @len: the len of @cur
2970 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002971 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002972 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002973 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002974 */
2975
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002976xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002977xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002978 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002979 xmlChar *ret;
2980
2981 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002982 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002983 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002984 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002985 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002986 return(NULL);
2987 }
2988 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002989 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +00002990 ret[len] = 0;
2991 return(ret);
2992}
2993
Daniel Veillard11e00581998-10-24 18:27:49 +00002994/**
2995 * xmlCharStrdup:
2996 * @cur: the input char *
2997 * @len: the len of @cur
2998 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002999 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003000 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003001 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003002 */
3003
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003004xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003005xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003006 const char *p = cur;
3007
Daniel Veillard10a2c651999-12-12 13:03:50 +00003008 if (cur == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003009 while (*p != '\0') p++;
3010 return(xmlCharStrndup(cur, p - cur));
3011}
3012
Daniel Veillard11e00581998-10-24 18:27:49 +00003013/**
3014 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003015 * @str1: the first xmlChar *
3016 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003017 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003018 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003019 *
3020 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00003021 */
3022
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003023int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003024xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003025 register int tmp;
3026
Daniel Veillard10a2c651999-12-12 13:03:50 +00003027 if ((str1 == NULL) && (str2 == NULL)) return(0);
3028 if (str1 == NULL) return(-1);
3029 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003030 do {
3031 tmp = *str1++ - *str2++;
3032 if (tmp != 0) return(tmp);
3033 } while ((*str1 != 0) && (*str2 != 0));
3034 return (*str1 - *str2);
3035}
3036
Daniel Veillard11e00581998-10-24 18:27:49 +00003037/**
3038 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003039 * @str1: the first xmlChar *
3040 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003041 * @len: the max comparison length
3042 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003043 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003044 *
3045 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00003046 */
3047
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003048int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003049xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003050 register int tmp;
3051
3052 if (len <= 0) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003053 if ((str1 == NULL) && (str2 == NULL)) return(0);
3054 if (str1 == NULL) return(-1);
3055 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003056 do {
3057 tmp = *str1++ - *str2++;
3058 if (tmp != 0) return(tmp);
3059 len--;
3060 if (len <= 0) return(0);
3061 } while ((*str1 != 0) && (*str2 != 0));
3062 return (*str1 - *str2);
3063}
3064
Daniel Veillard11e00581998-10-24 18:27:49 +00003065/**
3066 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003067 * @str: the xmlChar * array
3068 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00003069 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003070 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003071 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003072 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003073 */
3074
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003075const xmlChar *
3076xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003077 if (str == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003078 while (*str != 0) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003079 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003080 str++;
3081 }
3082 return(NULL);
3083}
3084
Daniel Veillard11e00581998-10-24 18:27:49 +00003085/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003086 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003087 * @str: the xmlChar * array (haystack)
3088 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003089 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003090 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003091 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003092 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003093 */
3094
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003095const xmlChar *
3096xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003097 int n;
3098
3099 if (str == NULL) return(NULL);
3100 if (val == NULL) return(NULL);
3101 n = xmlStrlen(val);
3102
3103 if (n == 0) return(str);
3104 while (*str != 0) {
3105 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003106 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003107 }
3108 str++;
3109 }
3110 return(NULL);
3111}
3112
3113/**
3114 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003115 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003116 * @start: the index of the first char (zero based)
3117 * @len: the length of the substring
3118 *
3119 * Extract a substring of a given string
3120 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003121 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003122 */
3123
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003124xmlChar *
3125xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003126 int i;
3127
3128 if (str == NULL) return(NULL);
3129 if (start < 0) return(NULL);
3130 if (len < 0) return(NULL);
3131
3132 for (i = 0;i < start;i++) {
3133 if (*str == 0) return(NULL);
3134 str++;
3135 }
3136 if (*str == 0) return(NULL);
3137 return(xmlStrndup(str, len));
3138}
3139
3140/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003141 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003142 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00003143 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003144 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00003145 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003146 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003147 */
3148
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003149int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003150xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003151 int len = 0;
3152
3153 if (str == NULL) return(0);
3154 while (*str != 0) {
3155 str++;
3156 len++;
3157 }
3158 return(len);
3159}
3160
Daniel Veillard11e00581998-10-24 18:27:49 +00003161/**
3162 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003163 * @cur: the original xmlChar * array
3164 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00003165 * @len: the length of @add
3166 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003167 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003168 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003169 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003170 */
3171
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003172xmlChar *
3173xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003174 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003175 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003176
3177 if ((add == NULL) || (len == 0))
3178 return(cur);
3179 if (cur == NULL)
3180 return(xmlStrndup(add, len));
3181
3182 size = xmlStrlen(cur);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003183 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003184 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00003185 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003186 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003187 return(cur);
3188 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003189 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003190 ret[size + len] = 0;
3191 return(ret);
3192}
3193
Daniel Veillard11e00581998-10-24 18:27:49 +00003194/**
3195 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003196 * @cur: the original xmlChar * array
3197 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00003198 *
Daniel Veillardcf461992000-03-14 18:30:20 +00003199 * a strcat for array of xmlChar's. Since they are supposed to be
3200 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
3201 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003202 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003203 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003204 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003205xmlChar *
3206xmlStrcat(xmlChar *cur, const xmlChar *add) {
3207 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003208
3209 if (add == NULL) return(cur);
3210 if (cur == NULL)
3211 return(xmlStrdup(add));
3212
Daniel Veillardcf461992000-03-14 18:30:20 +00003213 while (*p != 0) p++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003214 return(xmlStrncat(cur, add, p - add));
3215}
3216
3217/************************************************************************
3218 * *
3219 * Commodity functions, cleanup needed ? *
3220 * *
3221 ************************************************************************/
3222
Daniel Veillard11e00581998-10-24 18:27:49 +00003223/**
3224 * areBlanks:
3225 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003226 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003227 * @len: the size of @str
3228 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003229 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00003230 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003231 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003232 */
3233
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003234static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003235 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003236 xmlNodePtr lastChild;
3237
Daniel Veillard83a30e72000-03-02 03:33:32 +00003238 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003239 * Check for xml:space value.
3240 */
3241 if (*(ctxt->space) == 1)
3242 return(0);
3243
3244 /*
Daniel Veillard83a30e72000-03-02 03:33:32 +00003245 * Check that the string is made of blanks
3246 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003247 for (i = 0;i < len;i++)
3248 if (!(IS_BLANK(str[i]))) return(0);
3249
Daniel Veillard83a30e72000-03-02 03:33:32 +00003250 /*
3251 * Look if the element is mixed content in the Dtd if available
3252 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00003253 if (ctxt->myDoc != NULL) {
3254 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3255 if (ret == 0) return(1);
3256 if (ret == 1) return(0);
3257 }
Daniel Veillard83a30e72000-03-02 03:33:32 +00003258
Daniel Veillardb05deb71999-08-10 19:04:08 +00003259 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003260 * Otherwise, heuristic :-\
Daniel Veillardb05deb71999-08-10 19:04:08 +00003261 */
Daniel Veillard83a30e72000-03-02 03:33:32 +00003262 if (ctxt->keepBlanks)
3263 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003264 if (RAW != '<') return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00003265 if (ctxt->node == NULL) return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003266 if ((ctxt->node->children == NULL) &&
3267 (RAW == '<') && (NXT(1) == '/')) return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00003268
Daniel Veillard260a68f1998-08-13 03:39:55 +00003269 lastChild = xmlGetLastChild(ctxt->node);
3270 if (lastChild == NULL) {
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003271 if (ctxt->node->content != NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003272 } else if (xmlNodeIsText(lastChild))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003273 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003274 else if ((ctxt->node->children != NULL) &&
3275 (xmlNodeIsText(ctxt->node->children)))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003276 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003277 return(1);
3278}
3279
Daniel Veillard11e00581998-10-24 18:27:49 +00003280/**
3281 * xmlHandleEntity:
3282 * @ctxt: an XML parser context
3283 * @entity: an XML entity pointer.
3284 *
3285 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00003286 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00003287 *
3288 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003289 */
3290
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003291void
3292xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003293 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00003294 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003295
3296 if (entity->content == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003297 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003299 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00003300 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003301 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003302 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003303 return;
3304 }
3305 len = xmlStrlen(entity->content);
3306 if (len <= 2) goto handle_as_char;
3307
3308 /*
3309 * Redefine its content as an input stream.
3310 */
Daniel Veillardccb09631998-10-27 06:21:04 +00003311 input = xmlNewEntityInputStream(ctxt, entity);
3312 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003313 return;
3314
3315handle_as_char:
3316 /*
3317 * Just handle the content as a set of chars.
3318 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003319 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3320 (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003321 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003322
3323}
3324
3325/*
3326 * Forward definition for recusive behaviour.
3327 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00003328void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3329void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003330
3331/************************************************************************
3332 * *
3333 * Extra stuff for namespace support *
3334 * Relates to http://www.w3.org/TR/WD-xml-names *
3335 * *
3336 ************************************************************************/
3337
Daniel Veillard11e00581998-10-24 18:27:49 +00003338/**
3339 * xmlNamespaceParseNCName:
3340 * @ctxt: an XML parser context
3341 *
3342 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003343 *
3344 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3345 *
3346 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3347 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00003348 *
3349 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003350 */
3351
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003352xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003353xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003354 xmlChar buf[XML_MAX_NAMELEN + 5];
3355 int len = 0, l;
3356 int cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003357
Daniel Veillardcf461992000-03-14 18:30:20 +00003358 /* load first the value of the char !!! */
3359 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003360
Daniel Veillardcf461992000-03-14 18:30:20 +00003361 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3362 (cur == '.') || (cur == '-') ||
3363 (cur == '_') ||
3364 (IS_COMBINING(cur)) ||
3365 (IS_EXTENDER(cur))) {
3366 COPY_BUF(l,buf,len,cur);
3367 NEXTL(l);
3368 cur = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003369 if (len >= XML_MAX_NAMELEN) {
3370 fprintf(stderr,
3371 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003372 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3373 (cur == '.') || (cur == '-') ||
3374 (cur == '_') ||
3375 (IS_COMBINING(cur)) ||
3376 (IS_EXTENDER(cur))) {
3377 NEXTL(l);
3378 cur = CUR_CHAR(l);
3379 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003380 break;
3381 }
3382 }
3383 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003384}
3385
Daniel Veillard11e00581998-10-24 18:27:49 +00003386/**
3387 * xmlNamespaceParseQName:
3388 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003389 * @prefix: a xmlChar **
Daniel Veillard11e00581998-10-24 18:27:49 +00003390 *
3391 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003392 *
3393 * [NS 5] QName ::= (Prefix ':')? LocalPart
3394 *
3395 * [NS 6] Prefix ::= NCName
3396 *
3397 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00003398 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003399 * Returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00003400 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003401 */
3402
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003403xmlChar *
3404xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3405 xmlChar *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003406
3407 *prefix = NULL;
3408 ret = xmlNamespaceParseNCName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003409 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003410 *prefix = ret;
3411 NEXT;
3412 ret = xmlNamespaceParseNCName(ctxt);
3413 }
3414
3415 return(ret);
3416}
3417
Daniel Veillard11e00581998-10-24 18:27:49 +00003418/**
Daniel Veillard517752b1999-04-05 12:20:10 +00003419 * xmlSplitQName:
Daniel Veillardcf461992000-03-14 18:30:20 +00003420 * @ctxt: an XML parser context
Daniel Veillard517752b1999-04-05 12:20:10 +00003421 * @name: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003422 * @prefix: a xmlChar **
Daniel Veillard517752b1999-04-05 12:20:10 +00003423 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003424 * parse an UTF8 encoded XML qualified name string
Daniel Veillard517752b1999-04-05 12:20:10 +00003425 *
3426 * [NS 5] QName ::= (Prefix ':')? LocalPart
3427 *
3428 * [NS 6] Prefix ::= NCName
3429 *
3430 * [NS 7] LocalPart ::= NCName
3431 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003432 * Returns the local part, and prefix is updated
Daniel Veillard517752b1999-04-05 12:20:10 +00003433 * to get the Prefix if any.
3434 */
3435
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003436xmlChar *
Daniel Veillardcf461992000-03-14 18:30:20 +00003437xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3438 xmlChar buf[XML_MAX_NAMELEN + 5];
3439 int len = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003440 xmlChar *ret = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003441 const xmlChar *cur = name;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003442 int c;
Daniel Veillard517752b1999-04-05 12:20:10 +00003443
3444 *prefix = NULL;
Daniel Veillardb96e6431999-08-29 21:02:19 +00003445
3446 /* xml: prefix is not really a namespace */
3447 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3448 (cur[2] == 'l') && (cur[3] == ':'))
3449 return(xmlStrdup(name));
3450
Daniel Veillardcf461992000-03-14 18:30:20 +00003451 /* nasty but valid */
3452 if (cur[0] == ':')
3453 return(xmlStrdup(name));
Daniel Veillard517752b1999-04-05 12:20:10 +00003454
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003455 c = *cur++;
3456 while ((c != 0) && (c != ':')) {
3457 buf[len++] = c;
3458 c = *cur++;
Daniel Veillardcf461992000-03-14 18:30:20 +00003459 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003460
Daniel Veillardcf461992000-03-14 18:30:20 +00003461 ret = xmlStrndup(buf, len);
Daniel Veillard517752b1999-04-05 12:20:10 +00003462
Daniel Veillardcf461992000-03-14 18:30:20 +00003463 if (c == ':') {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003464 c = *cur++;
3465 if (c == 0) return(ret);
Daniel Veillard517752b1999-04-05 12:20:10 +00003466 *prefix = ret;
Daniel Veillardcf461992000-03-14 18:30:20 +00003467 len = 0;
Daniel Veillard517752b1999-04-05 12:20:10 +00003468
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003469 while (c != 0) {
3470 buf[len++] = c;
3471 c = *cur++;
Daniel Veillardcf461992000-03-14 18:30:20 +00003472 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003473
Daniel Veillardcf461992000-03-14 18:30:20 +00003474 ret = xmlStrndup(buf, len);
Daniel Veillard517752b1999-04-05 12:20:10 +00003475 }
3476
3477 return(ret);
3478}
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003479
Daniel Veillard517752b1999-04-05 12:20:10 +00003480/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003481 * xmlNamespaceParseNSDef:
3482 * @ctxt: an XML parser context
3483 *
3484 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003485 *
3486 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3487 *
3488 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00003489 *
3490 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003491 */
3492
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003493xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003494xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003495 xmlChar *name = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003496
Daniel Veillardcf461992000-03-14 18:30:20 +00003497 if ((RAW == 'x') && (NXT(1) == 'm') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003498 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3499 (NXT(4) == 's')) {
3500 SKIP(5);
Daniel Veillardcf461992000-03-14 18:30:20 +00003501 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003502 NEXT;
3503 name = xmlNamespaceParseNCName(ctxt);
3504 }
3505 }
3506 return(name);
3507}
3508
Daniel Veillard11e00581998-10-24 18:27:49 +00003509/**
3510 * xmlParseQuotedString:
3511 * @ctxt: an XML parser context
3512 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003513 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillardb05deb71999-08-10 19:04:08 +00003514 * To be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00003515 *
3516 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003517 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003518xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003519xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003520 xmlChar *buf = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003521 int len = 0,l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003522 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003523 int c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003524
Daniel Veillard10a2c651999-12-12 13:03:50 +00003525 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3526 if (buf == NULL) {
3527 fprintf(stderr, "malloc of %d byte failed\n", size);
3528 return(NULL);
3529 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003530 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003531 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003532 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003533 while (IS_CHAR(c) && (c != '"')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003534 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003535 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003536 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003537 if (buf == NULL) {
3538 fprintf(stderr, "realloc of %d byte failed\n", size);
3539 return(NULL);
3540 }
3541 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003542 COPY_BUF(l,buf,len,c);
3543 NEXTL(l);
3544 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003545 }
3546 if (c != '"') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003547 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003549 ctxt->sax->error(ctxt->userData,
3550 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003551 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003552 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003553 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003554 NEXT;
3555 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003556 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00003557 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003558 c = CUR;
3559 while (IS_CHAR(c) && (c != '\'')) {
3560 if (len + 1 >= size) {
3561 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003562 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003563 if (buf == NULL) {
3564 fprintf(stderr, "realloc of %d byte failed\n", size);
3565 return(NULL);
3566 }
3567 }
3568 buf[len++] = c;
3569 NEXT;
3570 c = CUR;
3571 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003572 if (RAW != '\'') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003573 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003575 ctxt->sax->error(ctxt->userData,
3576 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003577 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003578 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003579 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003580 NEXT;
3581 }
3582 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003583 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003584}
3585
Daniel Veillard11e00581998-10-24 18:27:49 +00003586/**
3587 * xmlParseNamespace:
3588 * @ctxt: an XML parser context
3589 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003590 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3591 *
3592 * This is what the older xml-name Working Draft specified, a bunch of
3593 * other stuff may still rely on it, so support is still here as
Daniel Veillard51e3b151999-11-12 17:02:31 +00003594 * if it was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00003595 *
3596 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00003597 */
3598
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003599void
3600xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003601 xmlChar *href = NULL;
3602 xmlChar *prefix = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003603 int garbage = 0;
3604
3605 /*
3606 * We just skipped "namespace" or "xml:namespace"
3607 */
3608 SKIP_BLANKS;
3609
Daniel Veillardcf461992000-03-14 18:30:20 +00003610 while (IS_CHAR(RAW) && (RAW != '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003611 /*
3612 * We can have "ns" or "prefix" attributes
3613 * Old encoding as 'href' or 'AS' attributes is still supported
3614 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003615 if ((RAW == 'n') && (NXT(1) == 's')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003616 garbage = 0;
3617 SKIP(2);
3618 SKIP_BLANKS;
3619
Daniel Veillardcf461992000-03-14 18:30:20 +00003620 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003621 NEXT;
3622 SKIP_BLANKS;
3623
3624 href = xmlParseQuotedString(ctxt);
3625 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003626 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003627 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3628 garbage = 0;
3629 SKIP(4);
3630 SKIP_BLANKS;
3631
Daniel Veillardcf461992000-03-14 18:30:20 +00003632 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003633 NEXT;
3634 SKIP_BLANKS;
3635
3636 href = xmlParseQuotedString(ctxt);
3637 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003638 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003639 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3640 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3641 garbage = 0;
3642 SKIP(6);
3643 SKIP_BLANKS;
3644
Daniel Veillardcf461992000-03-14 18:30:20 +00003645 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003646 NEXT;
3647 SKIP_BLANKS;
3648
3649 prefix = xmlParseQuotedString(ctxt);
3650 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003651 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003652 garbage = 0;
3653 SKIP(2);
3654 SKIP_BLANKS;
3655
Daniel Veillardcf461992000-03-14 18:30:20 +00003656 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003657 NEXT;
3658 SKIP_BLANKS;
3659
3660 prefix = xmlParseQuotedString(ctxt);
3661 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003662 } else if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003663 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003664 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003665 } else {
3666 /*
3667 * Found garbage when parsing the namespace
3668 */
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003669 if (!garbage) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003671 ctxt->sax->error(ctxt->userData,
3672 "xmlParseNamespace found garbage\n");
3673 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003674 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003675 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003676 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003677 NEXT;
3678 }
3679 }
3680
3681 MOVETO_ENDTAG(CUR_PTR);
3682 NEXT;
3683
3684 /*
3685 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003686 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00003687 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003688 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00003689 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003690
Daniel Veillard6454aec1999-09-02 22:04:43 +00003691 if (prefix != NULL) xmlFree(prefix);
3692 if (href != NULL) xmlFree(href);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003693}
3694
3695/************************************************************************
3696 * *
3697 * The parser itself *
3698 * Relates to http://www.w3.org/TR/REC-xml *
3699 * *
3700 ************************************************************************/
3701
Daniel Veillard11e00581998-10-24 18:27:49 +00003702/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00003703 * xmlScanName:
3704 * @ctxt: an XML parser context
3705 *
3706 * Trickery: parse an XML name but without consuming the input flow
3707 * Needed for rollback cases.
3708 *
3709 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3710 * CombiningChar | Extender
3711 *
3712 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3713 *
3714 * [6] Names ::= Name (S Name)*
3715 *
3716 * Returns the Name parsed or NULL
3717 */
3718
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003719xmlChar *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003720xmlScanName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003721 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillardb05deb71999-08-10 19:04:08 +00003722 int len = 0;
3723
3724 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003725 if (!IS_LETTER(RAW) && (RAW != '_') &&
3726 (RAW != ':')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003727 return(NULL);
3728 }
3729
3730 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3731 (NXT(len) == '.') || (NXT(len) == '-') ||
3732 (NXT(len) == '_') || (NXT(len) == ':') ||
3733 (IS_COMBINING(NXT(len))) ||
3734 (IS_EXTENDER(NXT(len)))) {
3735 buf[len] = NXT(len);
3736 len++;
3737 if (len >= XML_MAX_NAMELEN) {
3738 fprintf(stderr,
3739 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3740 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3741 (NXT(len) == '.') || (NXT(len) == '-') ||
3742 (NXT(len) == '_') || (NXT(len) == ':') ||
3743 (IS_COMBINING(NXT(len))) ||
3744 (IS_EXTENDER(NXT(len))))
3745 len++;
3746 break;
3747 }
3748 }
3749 return(xmlStrndup(buf, len));
3750}
3751
3752/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003753 * xmlParseName:
3754 * @ctxt: an XML parser context
3755 *
3756 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003757 *
3758 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3759 * CombiningChar | Extender
3760 *
3761 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3762 *
3763 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003764 *
3765 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003766 */
3767
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003768xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003769xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003770 xmlChar buf[XML_MAX_NAMELEN + 5];
3771 int len = 0, l;
3772 int c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003773
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003774 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003775 c = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00003776 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3777 (!IS_LETTER(c) && (c != '_') &&
3778 (c != ':'))) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003779 return(NULL);
3780 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003781
Daniel Veillard5e873c42000-04-12 13:27:38 +00003782 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3783 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3784 (c == '.') || (c == '-') ||
3785 (c == '_') || (c == ':') ||
3786 (IS_COMBINING(c)) ||
3787 (IS_EXTENDER(c)))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003788 COPY_BUF(l,buf,len,c);
3789 NEXTL(l);
3790 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003791 if (len >= XML_MAX_NAMELEN) {
3792 fprintf(stderr,
3793 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3795 (c == '.') || (c == '-') ||
3796 (c == '_') || (c == ':') ||
3797 (IS_COMBINING(c)) ||
3798 (IS_EXTENDER(c))) {
3799 NEXTL(l);
3800 c = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003801 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003802 break;
3803 }
3804 }
3805 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003806}
3807
Daniel Veillard11e00581998-10-24 18:27:49 +00003808/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00003809 * xmlParseStringName:
3810 * @ctxt: an XML parser context
3811 * @str: a pointer to an index in the string
3812 *
3813 * parse an XML name.
3814 *
3815 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3816 * CombiningChar | Extender
3817 *
3818 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3819 *
3820 * [6] Names ::= Name (S Name)*
3821 *
3822 * Returns the Name parsed or NULL. The str pointer
3823 * is updated to the current location in the string.
3824 */
3825
3826xmlChar *
3827xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003828 xmlChar buf[XML_MAX_NAMELEN + 5];
3829 const xmlChar *cur = *str;
3830 int len = 0, l;
3831 int c;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003832
Daniel Veillardcf461992000-03-14 18:30:20 +00003833 c = CUR_SCHAR(cur, l);
3834 if (!IS_LETTER(c) && (c != '_') &&
3835 (c != ':')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003836 return(NULL);
3837 }
3838
Daniel Veillardcf461992000-03-14 18:30:20 +00003839 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3840 (c == '.') || (c == '-') ||
3841 (c == '_') || (c == ':') ||
3842 (IS_COMBINING(c)) ||
3843 (IS_EXTENDER(c))) {
3844 COPY_BUF(l,buf,len,c);
3845 cur += l;
3846 c = CUR_SCHAR(cur, l);
3847 if (len >= XML_MAX_NAMELEN) {
3848 fprintf(stderr,
3849 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3850 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3851 (c == '.') || (c == '-') ||
3852 (c == '_') || (c == ':') ||
3853 (IS_COMBINING(c)) ||
3854 (IS_EXTENDER(c))) {
3855 cur += l;
3856 c = CUR_SCHAR(cur, l);
3857 }
3858 break;
3859 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003860 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003861 *str = cur;
3862 return(xmlStrndup(buf, len));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003863}
3864
3865/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003866 * xmlParseNmtoken:
3867 * @ctxt: an XML parser context
3868 *
3869 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003870 *
3871 * [7] Nmtoken ::= (NameChar)+
3872 *
3873 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003874 *
3875 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003876 */
3877
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003878xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003879xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003880 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003881 int len = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003882 int c,l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003883
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003884 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003885 c = CUR_CHAR(l);
3886 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3887 (c == '.') || (c == '-') ||
3888 (c == '_') || (c == ':') ||
3889 (IS_COMBINING(c)) ||
3890 (IS_EXTENDER(c))) {
3891 COPY_BUF(l,buf,len,c);
3892 NEXTL(l);
3893 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003894 if (len >= XML_MAX_NAMELEN) {
3895 fprintf(stderr,
3896 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003897 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3898 (c == '.') || (c == '-') ||
3899 (c == '_') || (c == ':') ||
3900 (IS_COMBINING(c)) ||
3901 (IS_EXTENDER(c))) {
3902 NEXTL(l);
3903 c = CUR_CHAR(l);
3904 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003905 break;
3906 }
3907 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003908 if (len == 0)
3909 return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003910 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003911}
3912
Daniel Veillard11e00581998-10-24 18:27:49 +00003913/**
3914 * xmlParseEntityValue:
3915 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00003916 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00003917 *
3918 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003919 *
3920 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3921 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00003922 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00003923 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003924 */
3925
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003926xmlChar *
3927xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003928 xmlChar *buf = NULL;
3929 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003930 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003931 int c, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003932 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003933 xmlChar *ret = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003934 const xmlChar *cur = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003935 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003936
Daniel Veillardcf461992000-03-14 18:30:20 +00003937 if (RAW == '"') stop = '"';
3938 else if (RAW == '\'') stop = '\'';
Daniel Veillard10a2c651999-12-12 13:03:50 +00003939 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003940 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003942 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003943 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003944 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003945 return(NULL);
3946 }
3947 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3948 if (buf == NULL) {
3949 fprintf(stderr, "malloc of %d byte failed\n", size);
3950 return(NULL);
3951 }
3952
3953 /*
3954 * The content of the entity definition is copied in a buffer.
3955 */
3956
3957 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3958 input = ctxt->input;
3959 GROW;
3960 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003961 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003962 /*
3963 * NOTE: 4.4.5 Included in Literal
3964 * When a parameter entity reference appears in a literal entity
3965 * value, ... a single or double quote character in the replacement
3966 * text is always treated as a normal data character and will not
3967 * terminate the literal.
3968 * In practice it means we stop the loop only when back at parsing
3969 * the initial entity and the quote is found
3970 */
3971 while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003972 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003973 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003974 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003975 if (buf == NULL) {
3976 fprintf(stderr, "realloc of %d byte failed\n", size);
3977 return(NULL);
3978 }
3979 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003980 COPY_BUF(l,buf,len,c);
3981 NEXTL(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003982 /*
3983 * Pop-up of finished entities.
3984 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003985 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003986 xmlPopInput(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003987
3988 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003989 if (c == 0) {
3990 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003991 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003992 }
3993 }
3994 buf[len] = 0;
3995
3996 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003997 * Raise problem w.r.t. '&' and '%' being used in non-entities
3998 * reference constructs. Note Charref will be handled in
3999 * xmlStringDecodeEntities()
4000 */
4001 cur = buf;
4002 while (*cur != 0) {
4003 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
4004 xmlChar *name;
4005 xmlChar tmp = *cur;
4006
4007 cur++;
4008 name = xmlParseStringName(ctxt, &cur);
4009 if ((name == NULL) || (*cur != ';')) {
4010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011 ctxt->sax->error(ctxt->userData,
4012 "EntityValue: '%c' forbidden except for entities references\n",
4013 tmp);
4014 ctxt->wellFormed = 0;
4015 ctxt->disableSAX = 1;
4016 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
4017 }
4018 if ((ctxt->inSubset == 1) && (tmp == '%')) {
4019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4020 ctxt->sax->error(ctxt->userData,
4021 "EntityValue: PEReferences forbidden in internal subset\n",
4022 tmp);
4023 ctxt->wellFormed = 0;
4024 ctxt->disableSAX = 1;
4025 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
4026 }
4027 if (name != NULL)
4028 xmlFree(name);
4029 }
4030 cur++;
4031 }
4032
4033 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00004034 * Then PEReference entities are substituted.
4035 */
4036 if (c != stop) {
4037 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
4038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4039 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
4040 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004041 ctxt->disableSAX = 1;
4042 xmlFree(buf);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004043 } else {
4044 NEXT;
4045 /*
4046 * NOTE: 4.4.7 Bypassed
4047 * When a general entity reference appears in the EntityValue in
4048 * an entity declaration, it is bypassed and left as is.
Daniel Veillardcf461992000-03-14 18:30:20 +00004049 * so XML_SUBSTITUTE_REF is not set here.
Daniel Veillard10a2c651999-12-12 13:03:50 +00004050 */
4051 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
4052 0, 0, 0);
4053 if (orig != NULL)
4054 *orig = buf;
4055 else
4056 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004057 }
4058
4059 return(ret);
4060}
4061
Daniel Veillard11e00581998-10-24 18:27:49 +00004062/**
4063 * xmlParseAttValue:
4064 * @ctxt: an XML parser context
4065 *
4066 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00004067 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00004068 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00004069 *
4070 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4071 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00004072 *
Daniel Veillard7f858501999-11-17 17:32:38 +00004073 * 3.3.3 Attribute-Value Normalization:
4074 * Before the value of an attribute is passed to the application or
4075 * checked for validity, the XML processor must normalize it as follows:
4076 * - a character reference is processed by appending the referenced
4077 * character to the attribute value
4078 * - an entity reference is processed by recursively processing the
4079 * replacement text of the entity
4080 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4081 * appending #x20 to the normalized value, except that only a single
4082 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4083 * parsed entity or the literal entity value of an internal parsed entity
4084 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00004085 * If the declared value is not CDATA, then the XML processor must further
4086 * process the normalized attribute value by discarding any leading and
4087 * trailing space (#x20) characters, and by replacing sequences of space
4088 * (#x20) characters by a single space (#x20) character.
4089 * All attributes for which no declaration has been read should be treated
4090 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00004091 *
4092 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004093 */
4094
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004095xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004096xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004097 xmlChar limit = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00004098 xmlChar *buf = NULL;
4099 int len = 0;
4100 int buf_size = 0;
4101 int c, l;
Daniel Veillard7f858501999-11-17 17:32:38 +00004102 xmlChar *current = NULL;
4103 xmlEntityPtr ent;
Daniel Veillard7f858501999-11-17 17:32:38 +00004104
Daniel Veillard260a68f1998-08-13 03:39:55 +00004105
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004106 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004107 if (NXT(0) == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004108 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00004109 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004110 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004111 } else if (NXT(0) == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004112 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00004113 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004114 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004115 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004116 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004118 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004119 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004120 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00004121 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004122 }
4123
Daniel Veillard7f858501999-11-17 17:32:38 +00004124 /*
4125 * allocate a translation buffer.
4126 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004127 buf_size = XML_PARSER_BUFFER_SIZE;
4128 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
4129 if (buf == NULL) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004130 perror("xmlParseAttValue: malloc failed");
4131 return(NULL);
4132 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004133
4134 /*
4135 * Ok loop until we reach one of the ending char or a size limit.
4136 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004137 c = CUR_CHAR(l);
4138 while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
4139 if (c == 0) break;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004140 if (ctxt->token == '&') {
4141 static xmlChar buffer[6] = "&#38;";
4142
4143 if (len > buf_size - 10) {
4144 growBuffer(buf);
4145 }
4146 current = &buffer[0];
4147 while (*current != 0) {
4148 buf[len++] = *current++;
4149 }
4150 ctxt->token = 0;
4151 } else if ((c == '&') && (NXT(1) == '#')) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004152 int val = xmlParseCharRef(ctxt);
Daniel Veillardbe803962000-06-28 23:40:59 +00004153 COPY_BUF(l,buf,len,val);
4154 NEXTL(l);
4155 } else if (c == '&') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004156 ent = xmlParseEntityRef(ctxt);
4157 if ((ent != NULL) &&
4158 (ctxt->replaceEntities != 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004159 xmlChar *rep;
Daniel Veillard7f858501999-11-17 17:32:38 +00004160
Daniel Veillardcf461992000-03-14 18:30:20 +00004161 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4162 rep = xmlStringDecodeEntities(ctxt, ent->content,
4163 XML_SUBSTITUTE_REF, 0, 0, 0);
4164 if (rep != NULL) {
4165 current = rep;
4166 while (*current != 0) {
Daniel Veillardbe803962000-06-28 23:40:59 +00004167 buf[len++] = *current++;
4168 if (len > buf_size - 10) {
4169 growBuffer(buf);
Daniel Veillardcf461992000-03-14 18:30:20 +00004170 }
4171 }
4172 xmlFree(rep);
Daniel Veillard7f858501999-11-17 17:32:38 +00004173 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004174 } else {
4175 if (ent->content != NULL)
Daniel Veillardbe803962000-06-28 23:40:59 +00004176 buf[len++] = ent->content[0];
Daniel Veillard7f858501999-11-17 17:32:38 +00004177 }
4178 } else if (ent != NULL) {
4179 int i = xmlStrlen(ent->name);
4180 const xmlChar *cur = ent->name;
4181
Daniel Veillardcf461992000-03-14 18:30:20 +00004182 /*
4183 * This may look absurd but is needed to detect
4184 * entities problems
4185 */
Daniel Veillard87b95392000-08-12 21:12:04 +00004186 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4187 (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004188 xmlChar *rep;
4189 rep = xmlStringDecodeEntities(ctxt, ent->content,
4190 XML_SUBSTITUTE_REF, 0, 0, 0);
4191 if (rep != NULL)
4192 xmlFree(rep);
4193 }
4194
4195 /*
4196 * Just output the reference
4197 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004198 buf[len++] = '&';
4199 if (len > buf_size - i - 10) {
4200 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004201 }
4202 for (;i > 0;i--)
Daniel Veillardbe803962000-06-28 23:40:59 +00004203 buf[len++] = *cur++;
4204 buf[len++] = ';';
Daniel Veillard7f858501999-11-17 17:32:38 +00004205 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004206 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00004207 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4208 COPY_BUF(l,buf,len,0x20);
4209 if (len > buf_size - 10) {
4210 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004211 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004212 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00004213 COPY_BUF(l,buf,len,c);
4214 if (len > buf_size - 10) {
4215 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004216 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004217 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004218 NEXTL(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00004219 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004220 GROW;
4221 c = CUR_CHAR(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00004222 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004223 buf[len++] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004224 if (RAW == '<') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4226 ctxt->sax->error(ctxt->userData,
4227 "Unescaped '<' not allowed in attributes values\n");
4228 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
4229 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004230 ctxt->disableSAX = 1;
4231 } else if (RAW != limit) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4233 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
4234 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
4235 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004236 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00004237 } else
4238 NEXT;
Daniel Veillardbe803962000-06-28 23:40:59 +00004239 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004240}
4241
Daniel Veillard11e00581998-10-24 18:27:49 +00004242/**
4243 * xmlParseSystemLiteral:
4244 * @ctxt: an XML parser context
4245 *
4246 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00004247 *
4248 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00004249 *
4250 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004251 */
4252
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004253xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004254xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004255 xmlChar *buf = NULL;
4256 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004257 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004258 int cur, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004259 xmlChar stop;
Daniel Veillardcf461992000-03-14 18:30:20 +00004260 int state = ctxt->instate;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004261
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004262 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004263 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004264 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004265 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00004266 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004267 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004268 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004269 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004271 ctxt->sax->error(ctxt->userData,
4272 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004273 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004274 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004275 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004276 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004277 }
4278
Daniel Veillard10a2c651999-12-12 13:03:50 +00004279 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4280 if (buf == NULL) {
4281 fprintf(stderr, "malloc of %d byte failed\n", size);
4282 return(NULL);
4283 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004284 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4285 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004286 while ((IS_CHAR(cur)) && (cur != stop)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004287 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004288 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004289 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004290 if (buf == NULL) {
4291 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004292 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004293 return(NULL);
4294 }
4295 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004296 COPY_BUF(l,buf,len,cur);
4297 NEXTL(l);
4298 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004299 if (cur == 0) {
4300 GROW;
4301 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004302 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004303 }
4304 }
4305 buf[len] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004306 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004307 if (!IS_CHAR(cur)) {
4308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4309 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4310 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4311 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004312 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004313 } else {
4314 NEXT;
4315 }
4316 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004317}
4318
Daniel Veillard11e00581998-10-24 18:27:49 +00004319/**
4320 * xmlParsePubidLiteral:
4321 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00004322 *
Daniel Veillard11e00581998-10-24 18:27:49 +00004323 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00004324 *
4325 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4326 *
4327 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004328 */
4329
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004330xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004331xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004332 xmlChar *buf = NULL;
4333 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004334 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004335 xmlChar cur;
4336 xmlChar stop;
Daniel Veillard6077d031999-10-09 09:11:45 +00004337
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004338 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004339 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004340 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004341 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00004342 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004343 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004344 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004345 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004347 ctxt->sax->error(ctxt->userData,
4348 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004349 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004350 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004351 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004352 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004353 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004354 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4355 if (buf == NULL) {
4356 fprintf(stderr, "malloc of %d byte failed\n", size);
4357 return(NULL);
4358 }
4359 cur = CUR;
4360 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4361 if (len + 1 >= size) {
4362 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004363 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004364 if (buf == NULL) {
4365 fprintf(stderr, "realloc of %d byte failed\n", size);
4366 return(NULL);
4367 }
4368 }
4369 buf[len++] = cur;
4370 NEXT;
4371 cur = CUR;
4372 if (cur == 0) {
4373 GROW;
4374 SHRINK;
4375 cur = CUR;
4376 }
4377 }
4378 buf[len] = 0;
4379 if (cur != stop) {
4380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4382 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4383 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004384 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004385 } else {
4386 NEXT;
4387 }
4388 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004389}
4390
Daniel Veillard11e00581998-10-24 18:27:49 +00004391/**
4392 * xmlParseCharData:
4393 * @ctxt: an XML parser context
4394 * @cdata: int indicating whether we are within a CDATA section
4395 *
4396 * parse a CharData section.
4397 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004398 *
Daniel Veillardcf461992000-03-14 18:30:20 +00004399 * The right angle bracket (>) may be represented using the string "&gt;",
4400 * and must, for compatibility, be escaped using "&gt;" or a character
4401 * reference when it appears in the string "]]>" in content, when that
4402 * string is not marking the end of a CDATA section.
4403 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004404 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4405 */
4406
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004407void
4408xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004409 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004410 int nbchar = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004411 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004412
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004413 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004414 cur = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00004415 while (((cur != '<') || (ctxt->token == '<')) &&
4416 ((cur != '&') || (ctxt->token == '&')) &&
4417 (IS_CHAR(cur))) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004418 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004419 (NXT(2) == '>')) {
4420 if (cdata) break;
4421 else {
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00004423 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004424 "Sequence ']]>' not allowed in content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004425 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillardcf461992000-03-14 18:30:20 +00004426 /* Should this be relaxed ??? I see a "must here */
4427 ctxt->wellFormed = 0;
4428 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004429 }
4430 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004431 COPY_BUF(l,buf,nbchar,cur);
4432 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004433 /*
4434 * Ok the segment is to be consumed as chars.
4435 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004436 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004437 if (areBlanks(ctxt, buf, nbchar)) {
4438 if (ctxt->sax->ignorableWhitespace != NULL)
4439 ctxt->sax->ignorableWhitespace(ctxt->userData,
4440 buf, nbchar);
4441 } else {
4442 if (ctxt->sax->characters != NULL)
4443 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4444 }
4445 }
4446 nbchar = 0;
4447 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004448 NEXTL(l);
4449 cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004450 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004451 if (nbchar != 0) {
4452 /*
4453 * Ok the segment is to be consumed as chars.
4454 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004455 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004456 if (areBlanks(ctxt, buf, nbchar)) {
4457 if (ctxt->sax->ignorableWhitespace != NULL)
4458 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4459 } else {
4460 if (ctxt->sax->characters != NULL)
4461 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4462 }
4463 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004464 }
4465}
4466
Daniel Veillard11e00581998-10-24 18:27:49 +00004467/**
4468 * xmlParseExternalID:
4469 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004470 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004471 * @strict: indicate whether we should restrict parsing to only
4472 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00004473 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004474 * Parse an External ID or a Public ID
4475 *
4476 * NOTE: Productions [75] and [83] interract badly since [75] can generate
4477 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00004478 *
4479 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4480 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004481 *
4482 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4483 *
4484 * Returns the function returns SystemLiteral and in the second
4485 * case publicID receives PubidLiteral, is strict is off
4486 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004487 */
4488
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004489xmlChar *
4490xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4491 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004492
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004493 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004494 if ((RAW == 'S') && (NXT(1) == 'Y') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004495 (NXT(2) == 'S') && (NXT(3) == 'T') &&
4496 (NXT(4) == 'E') && (NXT(5) == 'M')) {
4497 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004498 if (!IS_BLANK(CUR)) {
4499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004500 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004501 "Space required after 'SYSTEM'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004502 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004503 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004504 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004505 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004506 SKIP_BLANKS;
4507 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004508 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004510 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004511 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004512 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004513 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004514 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004515 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004516 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004517 (NXT(2) == 'B') && (NXT(3) == 'L') &&
4518 (NXT(4) == 'I') && (NXT(5) == 'C')) {
4519 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004520 if (!IS_BLANK(CUR)) {
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004522 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004523 "Space required after 'PUBLIC'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004525 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004526 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004527 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004528 SKIP_BLANKS;
4529 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004530 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004532 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004533 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004534 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004535 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004536 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004537 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00004538 if (strict) {
4539 /*
4540 * We don't handle [83] so "S SystemLiteral" is required.
4541 */
4542 if (!IS_BLANK(CUR)) {
4543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004544 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004545 "Space required after the Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004546 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004547 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004548 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004549 }
4550 } else {
4551 /*
4552 * We handle [83] so we return immediately, if
4553 * "S SystemLiteral" is not detected. From a purely parsing
4554 * point of view that's a nice mess.
4555 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00004556 const xmlChar *ptr;
4557 GROW;
4558
4559 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004560 if (!IS_BLANK(*ptr)) return(NULL);
4561
4562 while (IS_BLANK(*ptr)) ptr++;
Daniel Veillardcf461992000-03-14 18:30:20 +00004563 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004564 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004565 SKIP_BLANKS;
4566 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004567 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004569 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004570 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004571 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004572 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004573 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004574 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004575 }
4576 return(URI);
4577}
4578
Daniel Veillard11e00581998-10-24 18:27:49 +00004579/**
4580 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00004581 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004582 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004583 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00004584 * The spec says that "For compatibility, the string "--" (double-hyphen)
4585 * must not occur within comments. "
4586 *
4587 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4588 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004589void
Daniel Veillardb96e6431999-08-29 21:02:19 +00004590xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004591 xmlChar *buf = NULL;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004592 int len;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004593 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004594 int q, ql;
4595 int r, rl;
4596 int cur, l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004597 xmlParserInputState state;
Daniel Veillardcf461992000-03-14 18:30:20 +00004598 xmlParserInputPtr input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004599
4600 /*
4601 * Check that there is a comment right here.
4602 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004603 if ((RAW != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00004604 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004605
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004606 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004607 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004608 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004609 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004610 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4611 if (buf == NULL) {
4612 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004613 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004614 return;
4615 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004616 q = CUR_CHAR(ql);
4617 NEXTL(ql);
4618 r = CUR_CHAR(rl);
4619 NEXTL(rl);
4620 cur = CUR_CHAR(l);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004621 len = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004622 while (IS_CHAR(cur) &&
4623 ((cur != '>') ||
4624 (r != '-') || (q != '-'))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004625 if ((r == '-') && (q == '-') && (len > 1)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004627 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004628 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004629 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004630 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004631 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004632 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004633 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004634 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004635 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004636 if (buf == NULL) {
4637 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004638 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004639 return;
4640 }
4641 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004642 COPY_BUF(ql,buf,len,q);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004643 q = r;
Daniel Veillardcf461992000-03-14 18:30:20 +00004644 ql = rl;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004645 r = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00004646 rl = l;
4647 NEXTL(l);
4648 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004649 if (cur == 0) {
4650 SHRINK;
4651 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004652 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004653 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004654 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004655 buf[len] = 0;
4656 if (!IS_CHAR(cur)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004658 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00004659 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004660 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004661 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004662 ctxt->disableSAX = 1;
4663 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004664 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004665 if (input != ctxt->input) {
4666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4667 ctxt->sax->error(ctxt->userData,
4668"Comment doesn't start and stop in the same entity\n");
4669 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4670 ctxt->wellFormed = 0;
4671 ctxt->disableSAX = 1;
4672 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004673 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004674 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4675 (!ctxt->disableSAX))
Daniel Veillard10a2c651999-12-12 13:03:50 +00004676 ctxt->sax->comment(ctxt->userData, buf);
4677 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004678 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004679 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004680}
4681
Daniel Veillard11e00581998-10-24 18:27:49 +00004682/**
4683 * xmlParsePITarget:
4684 * @ctxt: an XML parser context
4685 *
4686 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00004687 *
4688 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00004689 *
4690 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004691 */
4692
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004693xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004694xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004695 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004696
4697 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00004698 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004699 ((name[0] == 'x') || (name[0] == 'X')) &&
4700 ((name[1] == 'm') || (name[1] == 'M')) &&
4701 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00004702 int i;
Daniel Veillardcf461992000-03-14 18:30:20 +00004703 if ((name[0] == 'x') && (name[1] == 'm') &&
4704 (name[2] == 'l') && (name[3] == 0)) {
4705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4706 ctxt->sax->error(ctxt->userData,
4707 "XML declaration allowed only at the start of the document\n");
4708 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4709 ctxt->wellFormed = 0;
4710 ctxt->disableSAX = 1;
4711 return(name);
4712 } else if (name[3] == 0) {
4713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4714 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4715 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4716 ctxt->wellFormed = 0;
4717 ctxt->disableSAX = 1;
4718 return(name);
4719 }
Daniel Veillard3c558c31999-12-22 11:30:41 +00004720 for (i = 0;;i++) {
4721 if (xmlW3CPIs[i] == NULL) break;
4722 if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4723 return(name);
4724 }
4725 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4726 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004727 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004728 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004729 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004730 }
4731 return(name);
4732}
4733
Daniel Veillard11e00581998-10-24 18:27:49 +00004734/**
4735 * xmlParsePI:
4736 * @ctxt: an XML parser context
4737 *
4738 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004739 *
4740 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004741 *
4742 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004743 */
4744
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004745void
4746xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004747 xmlChar *buf = NULL;
4748 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004749 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004750 int cur, l;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004751 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004752 xmlParserInputState state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004753
Daniel Veillardcf461992000-03-14 18:30:20 +00004754 if ((RAW == '<') && (NXT(1) == '?')) {
4755 xmlParserInputPtr input = ctxt->input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004756 state = ctxt->instate;
4757 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004758 /*
4759 * this is a Processing Instruction.
4760 */
4761 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004762 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004763
4764 /*
4765 * Parse the target name and check for special support like
4766 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004767 */
4768 target = xmlParsePITarget(ctxt);
4769 if (target != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004770 if ((RAW == '?') && (NXT(1) == '>')) {
4771 if (input != ctxt->input) {
4772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4773 ctxt->sax->error(ctxt->userData,
4774 "PI declaration doesn't start and stop in the same entity\n");
4775 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4776 ctxt->wellFormed = 0;
4777 ctxt->disableSAX = 1;
4778 }
4779 SKIP(2);
4780
4781 /*
4782 * SAX: PI detected.
4783 */
4784 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4785 (ctxt->sax->processingInstruction != NULL))
4786 ctxt->sax->processingInstruction(ctxt->userData,
4787 target, NULL);
4788 ctxt->instate = state;
4789 xmlFree(target);
4790 return;
4791 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004792 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4793 if (buf == NULL) {
4794 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004795 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004796 return;
4797 }
4798 cur = CUR;
4799 if (!IS_BLANK(cur)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "xmlParsePI: PI %s space expected\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004803 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004804 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004805 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004806 }
4807 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004808 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004809 while (IS_CHAR(cur) &&
4810 ((cur != '?') || (NXT(1) != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004811 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004812 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004813 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004814 if (buf == NULL) {
4815 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004816 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004817 return;
4818 }
4819 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004820 COPY_BUF(l,buf,len,cur);
4821 NEXTL(l);
4822 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004823 if (cur == 0) {
4824 SHRINK;
4825 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004826 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004827 }
4828 }
4829 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004830 if (cur != '?') {
Daniel Veillard517752b1999-04-05 12:20:10 +00004831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004832 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00004833 "xmlParsePI: PI %s never end ...\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004834 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00004835 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004836 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004837 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004838 if (input != ctxt->input) {
4839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4840 ctxt->sax->error(ctxt->userData,
4841 "PI declaration doesn't start and stop in the same entity\n");
4842 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4843 ctxt->wellFormed = 0;
4844 ctxt->disableSAX = 1;
4845 }
Daniel Veillard517752b1999-04-05 12:20:10 +00004846 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004847
Daniel Veillard517752b1999-04-05 12:20:10 +00004848 /*
4849 * SAX: PI detected.
4850 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004851 if ((ctxt->sax) && (!ctxt->disableSAX) &&
Daniel Veillard517752b1999-04-05 12:20:10 +00004852 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004853 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00004854 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004855 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004856 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004857 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004858 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004860 ctxt->sax->error(ctxt->userData,
4861 "xmlParsePI : no target name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004862 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004863 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004864 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004865 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004866 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004867 }
4868}
4869
Daniel Veillard11e00581998-10-24 18:27:49 +00004870/**
4871 * xmlParseNotationDecl:
4872 * @ctxt: an XML parser context
4873 *
4874 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00004875 *
4876 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4877 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004878 * Hence there is actually 3 choices:
4879 * 'PUBLIC' S PubidLiteral
4880 * 'PUBLIC' S PubidLiteral S SystemLiteral
4881 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00004882 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004883 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00004884 */
4885
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004886void
4887xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004888 xmlChar *name;
4889 xmlChar *Pubid;
4890 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004891
Daniel Veillardcf461992000-03-14 18:30:20 +00004892 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004893 (NXT(2) == 'N') && (NXT(3) == 'O') &&
4894 (NXT(4) == 'T') && (NXT(5) == 'A') &&
4895 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00004896 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004897 xmlParserInputPtr input = ctxt->input;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004898 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004899 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004900 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004902 ctxt->sax->error(ctxt->userData,
4903 "Space required after '<!NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004904 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004905 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004906 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004907 return;
4908 }
4909 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004910
4911 name = xmlParseName(ctxt);
4912 if (name == NULL) {
4913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004914 ctxt->sax->error(ctxt->userData,
4915 "NOTATION: Name expected here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004916 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004917 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004918 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004919 return;
4920 }
4921 if (!IS_BLANK(CUR)) {
4922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004923 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004924 "Space required after the NOTATION name'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004926 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004927 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004928 return;
4929 }
4930 SKIP_BLANKS;
4931
Daniel Veillard260a68f1998-08-13 03:39:55 +00004932 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00004933 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004934 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004935 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004936 SKIP_BLANKS;
4937
Daniel Veillardcf461992000-03-14 18:30:20 +00004938 if (RAW == '>') {
4939 if (input != ctxt->input) {
4940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4941 ctxt->sax->error(ctxt->userData,
4942"Notation declaration doesn't start and stop in the same entity\n");
4943 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4944 ctxt->wellFormed = 0;
4945 ctxt->disableSAX = 1;
4946 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004947 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004948 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004950 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004951 } else {
4952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004953 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004954 "'>' required to close NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004955 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004956 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004957 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004958 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004959 xmlFree(name);
4960 if (Systemid != NULL) xmlFree(Systemid);
4961 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004962 }
4963}
4964
Daniel Veillard11e00581998-10-24 18:27:49 +00004965/**
4966 * xmlParseEntityDecl:
4967 * @ctxt: an XML parser context
4968 *
4969 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004970 *
4971 * [70] EntityDecl ::= GEDecl | PEDecl
4972 *
4973 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4974 *
4975 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4976 *
4977 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4978 *
4979 * [74] PEDef ::= EntityValue | ExternalID
4980 *
4981 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00004982 *
4983 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004984 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004985 */
4986
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004987void
4988xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004989 xmlChar *name = NULL;
4990 xmlChar *value = NULL;
4991 xmlChar *URI = NULL, *literal = NULL;
4992 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004993 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004994 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004995
Daniel Veillardb05deb71999-08-10 19:04:08 +00004996 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004997 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004998 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4999 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005000 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005001 xmlParserInputPtr input = ctxt->input;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005002 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005003 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005004 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005005 if (!IS_BLANK(CUR)) {
5006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005007 ctxt->sax->error(ctxt->userData,
5008 "Space required after '<!ENTITY'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005009 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005010 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005011 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005012 }
5013 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005014
Daniel Veillardcf461992000-03-14 18:30:20 +00005015 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005016 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005017 if (!IS_BLANK(CUR)) {
5018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005019 ctxt->sax->error(ctxt->userData,
5020 "Space required after '%'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005021 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005022 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005023 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005024 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005025 SKIP_BLANKS;
5026 isParameter = 1;
5027 }
5028
5029 name = xmlParseName(ctxt);
5030 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005032 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005033 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005034 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005035 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005036 return;
5037 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005038 if (!IS_BLANK(CUR)) {
5039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005040 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005041 "Space required after the entity name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005042 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005043 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005044 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005045 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005046 SKIP_BLANKS;
5047
5048 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00005049 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00005050 */
5051 if (isParameter) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005052 if ((RAW == '"') || (RAW == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005053 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005054 if (value) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005055 if ((ctxt->sax != NULL) &&
5056 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005057 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005058 XML_INTERNAL_PARAMETER_ENTITY,
5059 NULL, NULL, value);
5060 }
5061 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005062 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00005063 if ((URI == NULL) && (literal == NULL)) {
5064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5065 ctxt->sax->error(ctxt->userData,
5066 "Entity value required\n");
5067 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5068 ctxt->wellFormed = 0;
5069 ctxt->disableSAX = 1;
5070 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005071 if (URI) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005072 xmlURIPtr uri;
5073
5074 uri = xmlParseURI((const char *) URI);
5075 if (uri == NULL) {
5076 if ((ctxt->sax != NULL) &&
5077 (!ctxt->disableSAX) &&
5078 (ctxt->sax->error != NULL))
5079 ctxt->sax->error(ctxt->userData,
5080 "Invalid URI: %s\n", URI);
5081 ctxt->wellFormed = 0;
5082 ctxt->errNo = XML_ERR_INVALID_URI;
5083 } else {
5084 if (uri->fragment != NULL) {
5085 if ((ctxt->sax != NULL) &&
5086 (!ctxt->disableSAX) &&
5087 (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "Fragment not allowed: %s\n", URI);
5090 ctxt->wellFormed = 0;
5091 ctxt->errNo = XML_ERR_URI_FRAGMENT;
5092 } else {
5093 if ((ctxt->sax != NULL) &&
5094 (!ctxt->disableSAX) &&
5095 (ctxt->sax->entityDecl != NULL))
5096 ctxt->sax->entityDecl(ctxt->userData, name,
5097 XML_EXTERNAL_PARAMETER_ENTITY,
5098 literal, URI, NULL);
5099 }
5100 xmlFreeURI(uri);
5101 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005102 }
5103 }
5104 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005105 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005106 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillardcf461992000-03-14 18:30:20 +00005107 if ((ctxt->sax != NULL) &&
5108 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005109 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005110 XML_INTERNAL_GENERAL_ENTITY,
5111 NULL, NULL, value);
5112 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005113 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00005114 if ((URI == NULL) && (literal == NULL)) {
5115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5116 ctxt->sax->error(ctxt->userData,
5117 "Entity value required\n");
5118 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5119 ctxt->wellFormed = 0;
5120 ctxt->disableSAX = 1;
5121 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005122 if (URI) {
5123 xmlURIPtr uri;
5124
5125 uri = xmlParseURI((const char *)URI);
5126 if (uri == NULL) {
5127 if ((ctxt->sax != NULL) &&
5128 (!ctxt->disableSAX) &&
5129 (ctxt->sax->error != NULL))
5130 ctxt->sax->error(ctxt->userData,
5131 "Invalid URI: %s\n", URI);
5132 ctxt->wellFormed = 0;
5133 ctxt->errNo = XML_ERR_INVALID_URI;
5134 } else {
5135 if (uri->fragment != NULL) {
5136 if ((ctxt->sax != NULL) &&
5137 (!ctxt->disableSAX) &&
5138 (ctxt->sax->error != NULL))
5139 ctxt->sax->error(ctxt->userData,
5140 "Fragment not allowed: %s\n", URI);
5141 ctxt->wellFormed = 0;
5142 ctxt->errNo = XML_ERR_URI_FRAGMENT;
5143 }
5144 xmlFreeURI(uri);
5145 }
5146 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005147 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005149 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005150 "Space required before 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005151 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005152 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005153 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005154 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005155 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005156 if ((RAW == 'N') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005157 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5158 (NXT(4) == 'A')) {
5159 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005160 if (!IS_BLANK(CUR)) {
5161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005162 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005163 "Space required after 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005164 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005165 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005166 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005167 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005168 SKIP_BLANKS;
5169 ndata = xmlParseName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00005170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00005171 (ctxt->sax->unparsedEntityDecl != NULL))
5172 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005173 literal, URI, ndata);
5174 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005175 if ((ctxt->sax != NULL) &&
5176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005177 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005178 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179 literal, URI, NULL);
5180 }
5181 }
5182 }
5183 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005184 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005186 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005187 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005188 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005189 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005190 ctxt->disableSAX = 1;
5191 } else {
5192 if (input != ctxt->input) {
5193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194 ctxt->sax->error(ctxt->userData,
5195"Entity declaration doesn't start and stop in the same entity\n");
5196 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5197 ctxt->wellFormed = 0;
5198 ctxt->disableSAX = 1;
5199 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005200 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005201 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005202 if (orig != NULL) {
5203 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005204 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00005205 */
5206 xmlEntityPtr cur = NULL;
5207
Daniel Veillardb05deb71999-08-10 19:04:08 +00005208 if (isParameter) {
5209 if ((ctxt->sax != NULL) &&
5210 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00005211 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005212 } else {
5213 if ((ctxt->sax != NULL) &&
5214 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00005215 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005216 }
5217 if (cur != NULL) {
5218 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005219 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005220 else
5221 cur->orig = orig;
5222 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00005223 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005224 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005225 if (name != NULL) xmlFree(name);
5226 if (value != NULL) xmlFree(value);
5227 if (URI != NULL) xmlFree(URI);
5228 if (literal != NULL) xmlFree(literal);
5229 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005230 }
5231}
5232
Daniel Veillard11e00581998-10-24 18:27:49 +00005233/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005234 * xmlParseDefaultDecl:
5235 * @ctxt: an XML parser context
5236 * @value: Receive a possible fixed default value for the attribute
5237 *
5238 * Parse an attribute default declaration
5239 *
5240 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5241 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005242 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005243 * if the default declaration is the keyword #REQUIRED, then the
5244 * attribute must be specified for all elements of the type in the
5245 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005246 *
5247 * [ VC: Attribute Default Legal ]
5248 * The declared default value must meet the lexical constraints of
5249 * the declared attribute type c.f. xmlValidateAttributeDecl()
5250 *
5251 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005252 * if an attribute has a default value declared with the #FIXED
5253 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005254 *
5255 * [ WFC: No < in Attribute Values ]
5256 * handled in xmlParseAttValue()
5257 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005258 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5259 * or XML_ATTRIBUTE_FIXED.
5260 */
5261
5262int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005263xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005264 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005265 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005266
5267 *value = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00005268 if ((RAW == '#') && (NXT(1) == 'R') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005269 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
5270 (NXT(4) == 'U') && (NXT(5) == 'I') &&
5271 (NXT(6) == 'R') && (NXT(7) == 'E') &&
5272 (NXT(8) == 'D')) {
5273 SKIP(9);
5274 return(XML_ATTRIBUTE_REQUIRED);
5275 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005276 if ((RAW == '#') && (NXT(1) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005277 (NXT(2) == 'M') && (NXT(3) == 'P') &&
5278 (NXT(4) == 'L') && (NXT(5) == 'I') &&
5279 (NXT(6) == 'E') && (NXT(7) == 'D')) {
5280 SKIP(8);
5281 return(XML_ATTRIBUTE_IMPLIED);
5282 }
5283 val = XML_ATTRIBUTE_NONE;
Daniel Veillardcf461992000-03-14 18:30:20 +00005284 if ((RAW == '#') && (NXT(1) == 'F') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005285 (NXT(2) == 'I') && (NXT(3) == 'X') &&
5286 (NXT(4) == 'E') && (NXT(5) == 'D')) {
5287 SKIP(6);
5288 val = XML_ATTRIBUTE_FIXED;
5289 if (!IS_BLANK(CUR)) {
5290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005291 ctxt->sax->error(ctxt->userData,
5292 "Space required after '#FIXED'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005293 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005294 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005295 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005296 }
5297 SKIP_BLANKS;
5298 }
5299 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005300 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005301 if (ret == NULL) {
5302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005303 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005304 "Attribute default value declaration error\n");
5305 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005306 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005307 } else
5308 *value = ret;
5309 return(val);
5310}
5311
5312/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00005313 * xmlParseNotationType:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse an Notation attribute type.
5317 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005318 * Note: the leading 'NOTATION' S part has already being parsed...
5319 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5321 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005322 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005323 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00005324 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00005325 *
5326 * Returns: the notation attribute tree built while parsing
5327 */
5328
5329xmlEnumerationPtr
5330xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005331 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005332 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5333
Daniel Veillardcf461992000-03-14 18:30:20 +00005334 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005336 ctxt->sax->error(ctxt->userData,
5337 "'(' required to start 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005338 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005339 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005340 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005341 return(NULL);
5342 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005343 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005344 do {
5345 NEXT;
5346 SKIP_BLANKS;
5347 name = xmlParseName(ctxt);
5348 if (name == NULL) {
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005350 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005351 "Name expected in NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005352 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005353 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005354 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005355 return(ret);
5356 }
5357 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005358 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005359 if (cur == NULL) return(ret);
5360 if (last == NULL) ret = last = cur;
5361 else {
5362 last->next = cur;
5363 last = cur;
5364 }
5365 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005366 } while (RAW == '|');
5367 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005369 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005370 "')' required to finish NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005371 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005372 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005373 ctxt->disableSAX = 1;
5374 if ((last != NULL) && (last != ret))
5375 xmlFreeEnumeration(last);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005376 return(ret);
5377 }
5378 NEXT;
5379 return(ret);
5380}
5381
5382/**
5383 * xmlParseEnumerationType:
5384 * @ctxt: an XML parser context
5385 *
5386 * parse an Enumeration attribute type.
5387 *
5388 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5389 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005390 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005391 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00005392 * the declaration
5393 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005394 * Returns: the enumeration attribute tree built while parsing
5395 */
5396
5397xmlEnumerationPtr
5398xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005399 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005400 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5401
Daniel Veillardcf461992000-03-14 18:30:20 +00005402 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005404 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005405 "'(' required to start ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005406 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005407 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005408 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005409 return(NULL);
5410 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005411 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005412 do {
5413 NEXT;
5414 SKIP_BLANKS;
5415 name = xmlParseNmtoken(ctxt);
5416 if (name == NULL) {
5417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005418 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005419 "NmToken expected in ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005420 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005421 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005422 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005423 return(ret);
5424 }
5425 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005426 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005427 if (cur == NULL) return(ret);
5428 if (last == NULL) ret = last = cur;
5429 else {
5430 last->next = cur;
5431 last = cur;
5432 }
5433 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005434 } while (RAW == '|');
5435 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005437 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005438 "')' required to finish ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005439 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005440 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005441 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005442 return(ret);
5443 }
5444 NEXT;
5445 return(ret);
5446}
5447
5448/**
Daniel Veillard11e00581998-10-24 18:27:49 +00005449 * xmlParseEnumeratedType:
5450 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005451 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005452 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005453 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005454 *
5455 * [57] EnumeratedType ::= NotationType | Enumeration
5456 *
5457 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5458 *
Daniel Veillard11e00581998-10-24 18:27:49 +00005459 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005460 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00005461 */
5462
Daniel Veillard1e346af1999-02-22 10:33:01 +00005463int
5464xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005465 if ((RAW == 'N') && (NXT(1) == 'O') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005466 (NXT(2) == 'T') && (NXT(3) == 'A') &&
5467 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5468 (NXT(6) == 'O') && (NXT(7) == 'N')) {
5469 SKIP(8);
5470 if (!IS_BLANK(CUR)) {
5471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005472 ctxt->sax->error(ctxt->userData,
5473 "Space required after 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005474 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005475 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005476 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005477 return(0);
5478 }
5479 SKIP_BLANKS;
5480 *tree = xmlParseNotationType(ctxt);
5481 if (*tree == NULL) return(0);
5482 return(XML_ATTRIBUTE_NOTATION);
5483 }
5484 *tree = xmlParseEnumerationType(ctxt);
5485 if (*tree == NULL) return(0);
5486 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005487}
5488
Daniel Veillard11e00581998-10-24 18:27:49 +00005489/**
5490 * xmlParseAttributeType:
5491 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005492 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005493 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005494 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005495 *
5496 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5497 *
5498 * [55] StringType ::= 'CDATA'
5499 *
5500 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5501 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00005502 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005503 * Validity constraints for attribute values syntax are checked in
5504 * xmlValidateAttributeValue()
5505 *
5506 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005507 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00005508 * appear more than once in an XML document as a value of this type;
5509 * i.e., ID values must uniquely identify the elements which bear them.
5510 *
5511 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005512 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005513 *
5514 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005515 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005516 *
5517 * [ VC: IDREF ]
5518 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005519 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00005520 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00005521 * values must match the value of some ID attribute.
5522 *
5523 * [ VC: Entity Name ]
5524 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005525 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00005526 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005527 *
5528 * [ VC: Name Token ]
5529 * Values of type NMTOKEN must match the Nmtoken production; values
5530 * of type NMTOKENS must match Nmtokens.
5531 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005532 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00005533 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005534int
Daniel Veillard1e346af1999-02-22 10:33:01 +00005535xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005536 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005537 if ((RAW == 'C') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005538 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5539 (NXT(4) == 'A')) {
5540 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005541 return(XML_ATTRIBUTE_CDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00005542 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005543 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005544 (NXT(4) == 'F') && (NXT(5) == 'S')) {
5545 SKIP(6);
5546 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005547 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005548 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005549 (NXT(4) == 'F')) {
5550 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005551 return(XML_ATTRIBUTE_IDREF);
Daniel Veillardcf461992000-03-14 18:30:20 +00005552 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005553 SKIP(2);
5554 return(XML_ATTRIBUTE_ID);
Daniel Veillardcf461992000-03-14 18:30:20 +00005555 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005556 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5557 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5558 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005559 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillardcf461992000-03-14 18:30:20 +00005560 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005561 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5562 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5563 (NXT(6) == 'E') && (NXT(7) == 'S')) {
5564 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005565 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillardcf461992000-03-14 18:30:20 +00005566 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005567 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5568 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005569 (NXT(6) == 'N') && (NXT(7) == 'S')) {
5570 SKIP(8);
5571 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005572 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005573 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5574 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005575 (NXT(6) == 'N')) {
5576 SKIP(7);
5577 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005578 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00005579 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00005580}
5581
Daniel Veillard11e00581998-10-24 18:27:49 +00005582/**
5583 * xmlParseAttributeListDecl:
5584 * @ctxt: an XML parser context
5585 *
5586 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005587 *
5588 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5589 *
5590 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00005591 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005592 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005593void
5594xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005595 xmlChar *elemName;
5596 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005597 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005598
Daniel Veillardcf461992000-03-14 18:30:20 +00005599 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005600 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5601 (NXT(4) == 'T') && (NXT(5) == 'L') &&
5602 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005603 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005604 xmlParserInputPtr input = ctxt->input;
5605
Daniel Veillard260a68f1998-08-13 03:39:55 +00005606 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005607 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005609 ctxt->sax->error(ctxt->userData,
5610 "Space required after '<!ATTLIST'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005611 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005612 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005613 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005614 }
5615 SKIP_BLANKS;
5616 elemName = xmlParseName(ctxt);
5617 if (elemName == NULL) {
5618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005619 ctxt->sax->error(ctxt->userData,
5620 "ATTLIST: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005621 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005622 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005623 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005624 return;
5625 }
5626 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005627 while (RAW != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005628 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005629 int type;
5630 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005631 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005632
Daniel Veillardb05deb71999-08-10 19:04:08 +00005633 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005634 attrName = xmlParseName(ctxt);
5635 if (attrName == NULL) {
5636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005637 ctxt->sax->error(ctxt->userData,
5638 "ATTLIST: no name for Attribute\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005639 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005640 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005641 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005642 break;
5643 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005644 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005645 if (!IS_BLANK(CUR)) {
5646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005647 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005648 "Space required after the attribute name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005649 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005650 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005651 ctxt->disableSAX = 1;
5652 if (attrName != NULL)
5653 xmlFree(attrName);
5654 if (defaultValue != NULL)
5655 xmlFree(defaultValue);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005656 break;
5657 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005658 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005659
Daniel Veillard1e346af1999-02-22 10:33:01 +00005660 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillardcf461992000-03-14 18:30:20 +00005661 if (type <= 0) {
5662 if (attrName != NULL)
5663 xmlFree(attrName);
5664 if (defaultValue != NULL)
5665 xmlFree(defaultValue);
5666 break;
5667 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005668
Daniel Veillardb05deb71999-08-10 19:04:08 +00005669 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005670 if (!IS_BLANK(CUR)) {
5671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005672 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005673 "Space required after the attribute type\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005674 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005675 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005676 ctxt->disableSAX = 1;
5677 if (attrName != NULL)
5678 xmlFree(attrName);
5679 if (defaultValue != NULL)
5680 xmlFree(defaultValue);
5681 if (tree != NULL)
5682 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005683 break;
5684 }
5685 SKIP_BLANKS;
5686
5687 def = xmlParseDefaultDecl(ctxt, &defaultValue);
Daniel Veillardcf461992000-03-14 18:30:20 +00005688 if (def <= 0) {
5689 if (attrName != NULL)
5690 xmlFree(attrName);
5691 if (defaultValue != NULL)
5692 xmlFree(defaultValue);
5693 if (tree != NULL)
5694 xmlFreeEnumeration(tree);
5695 break;
5696 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005697
Daniel Veillardb05deb71999-08-10 19:04:08 +00005698 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005699 if (RAW != '>') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005700 if (!IS_BLANK(CUR)) {
5701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005702 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005703 "Space required after the attribute default value\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005704 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005705 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005706 ctxt->disableSAX = 1;
5707 if (attrName != NULL)
5708 xmlFree(attrName);
5709 if (defaultValue != NULL)
5710 xmlFree(defaultValue);
5711 if (tree != NULL)
5712 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005713 break;
5714 }
5715 SKIP_BLANKS;
5716 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005717 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005719 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005720 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005721 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardcf461992000-03-14 18:30:20 +00005722 if (attrName != NULL)
5723 xmlFree(attrName);
5724 if (defaultValue != NULL)
5725 xmlFree(defaultValue);
5726 if (tree != NULL)
5727 xmlFreeEnumeration(tree);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005728 break;
5729 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005730 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5731 (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005732 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005733 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005734 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005735 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005736 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005737 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005738 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005739 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005740 if (RAW == '>') {
5741 if (input != ctxt->input) {
5742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743 ctxt->sax->error(ctxt->userData,
5744"Attribute list declaration doesn't start and stop in the same entity\n");
5745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5746 ctxt->wellFormed = 0;
5747 ctxt->disableSAX = 1;
5748 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005749 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005750 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005751
Daniel Veillard6454aec1999-09-02 22:04:43 +00005752 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005753 }
5754}
5755
Daniel Veillard11e00581998-10-24 18:27:49 +00005756/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005757 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00005758 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005759 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005760 * parse the declaration for a Mixed Element content
5761 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00005762 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005763 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5764 * '(' S? '#PCDATA' S? ')'
5765 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005766 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5767 *
5768 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005769 * The same name must not appear more than once in a single
5770 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005771 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005772 * returns: the list of the xmlElementContentPtr describing the element choices
5773 */
5774xmlElementContentPtr
5775xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00005776 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005777 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005778
Daniel Veillardb05deb71999-08-10 19:04:08 +00005779 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005780 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005781 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5782 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5783 (NXT(6) == 'A')) {
5784 SKIP(7);
5785 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005786 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005787 if (RAW == ')') {
5788 ctxt->entity = ctxt->input;
Daniel Veillard3b9def11999-01-31 22:15:06 +00005789 NEXT;
5790 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00005791 if (RAW == '*') {
Daniel Veillardf600e251999-12-18 15:32:46 +00005792 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5793 NEXT;
5794 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00005795 return(ret);
5796 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005797 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005798 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5799 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005800 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005801 while (RAW == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00005802 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005803 if (elem == NULL) {
5804 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5805 if (ret == NULL) return(NULL);
5806 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00005807 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005808 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00005809 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5810 if (n == NULL) return(NULL);
5811 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5812 cur->c2 = n;
5813 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005814 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005815 }
5816 SKIP_BLANKS;
5817 elem = xmlParseName(ctxt);
5818 if (elem == NULL) {
5819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005820 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005821 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005822 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005823 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005824 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005825 xmlFreeElementContent(cur);
5826 return(NULL);
5827 }
5828 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005829 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005830 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005831 if ((RAW == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005832 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005833 cur->c2 = xmlNewElementContent(elem,
5834 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005835 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005836 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005837 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005838 ctxt->entity = ctxt->input;
Daniel Veillard1899e851999-02-01 12:18:54 +00005839 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005840 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00005841 if (elem != NULL) xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005843 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00005844 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005845 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005846 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005847 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005848 xmlFreeElementContent(ret);
5849 return(NULL);
5850 }
5851
5852 } else {
5853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005854 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005855 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005856 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005857 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005858 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005859 }
5860 return(ret);
5861}
5862
5863/**
5864 * xmlParseElementChildrenContentDecl:
5865 * @ctxt: an XML parser context
5866 *
5867 * parse the declaration for a Mixed Element content
5868 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5869 *
5870 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005871 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5872 *
5873 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5874 *
5875 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5876 *
5877 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5878 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005879 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5880 * TODO Parameter-entity replacement text must be properly nested
5881 * with parenthetized groups. That is to say, if either of the
5882 * opening or closing parentheses in a choice, seq, or Mixed
5883 * construct is contained in the replacement text for a parameter
5884 * entity, both must be contained in the same replacement text. For
5885 * interoperability, if a parameter-entity reference appears in a
5886 * choice, seq, or Mixed construct, its replacement text should not
5887 * be empty, and neither the first nor last non-blank character of
5888 * the replacement text should be a connector (| or ,).
5889 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005890 * returns: the tree of xmlElementContentPtr describing the element
5891 * hierarchy.
5892 */
5893xmlElementContentPtr
5894xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
5895 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005896 xmlChar *elem;
5897 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005898
5899 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005900 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005901 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005902 /* Recurse on first child */
5903 NEXT;
5904 SKIP_BLANKS;
5905 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5906 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005907 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005908 } else {
5909 elem = xmlParseName(ctxt);
5910 if (elem == NULL) {
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005912 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005913 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005914 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005915 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005916 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005917 return(NULL);
5918 }
5919 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005920 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005921 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005922 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005923 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005924 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005925 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005926 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005927 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005928 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005929 NEXT;
5930 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005931 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005932 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005933 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005934 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005935 }
5936 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005937 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005938 while (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005939 /*
5940 * Each loop we parse one separator and one element.
5941 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005942 if (RAW == ',') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005943 if (type == 0) type = CUR;
5944
5945 /*
5946 * Detect "Name | Name , Name" error
5947 */
5948 else if (type != CUR) {
5949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005950 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005951 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5952 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005953 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005954 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005955 ctxt->disableSAX = 1;
5956 if ((op != NULL) && (op != ret))
5957 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00005958 if ((last != NULL) && (last != ret) &&
5959 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00005960 xmlFreeElementContent(last);
5961 if (ret != NULL)
5962 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005963 return(NULL);
5964 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005965 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005966
5967 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5968 if (op == NULL) {
5969 xmlFreeElementContent(ret);
5970 return(NULL);
5971 }
5972 if (last == NULL) {
5973 op->c1 = ret;
5974 ret = cur = op;
5975 } else {
5976 cur->c2 = op;
5977 op->c1 = last;
5978 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00005979 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005980 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005981 } else if (RAW == '|') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005982 if (type == 0) type = CUR;
5983
5984 /*
5985 * Detect "Name , Name | Name" error
5986 */
5987 else if (type != CUR) {
5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005989 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005990 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5991 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005992 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005993 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005994 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00005995 if ((op != NULL) && (op != ret) && (op != last))
Daniel Veillardcf461992000-03-14 18:30:20 +00005996 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00005997 if ((last != NULL) && (last != ret) &&
5998 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00005999 xmlFreeElementContent(last);
6000 if (ret != NULL)
6001 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006002 return(NULL);
6003 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006004 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006005
6006 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6007 if (op == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006008 if ((op != NULL) && (op != ret))
6009 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006010 if ((last != NULL) && (last != ret) &&
6011 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006012 xmlFreeElementContent(last);
6013 if (ret != NULL)
6014 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006015 return(NULL);
6016 }
6017 if (last == NULL) {
6018 op->c1 = ret;
6019 ret = cur = op;
6020 } else {
6021 cur->c2 = op;
6022 op->c1 = last;
6023 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00006024 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006025 }
6026 } else {
6027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006028 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006029 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
6030 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006031 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006032 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillardcf461992000-03-14 18:30:20 +00006033 if ((op != NULL) && (op != ret))
6034 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006035 if ((last != NULL) && (last != ret) &&
6036 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006037 xmlFreeElementContent(last);
6038 if (ret != NULL)
6039 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006040 return(NULL);
6041 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006042 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006043 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006044 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006045 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006046 /* Recurse on second child */
6047 NEXT;
6048 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00006049 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006050 SKIP_BLANKS;
6051 } else {
6052 elem = xmlParseName(ctxt);
6053 if (elem == NULL) {
6054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006055 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006056 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006057 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006058 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006059 ctxt->disableSAX = 1;
6060 if ((op != NULL) && (op != ret))
6061 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006062 if ((last != NULL) && (last != ret) &&
6063 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006064 xmlFreeElementContent(last);
6065 if (ret != NULL)
6066 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006067 return(NULL);
6068 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006069 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006070 xmlFree(elem);
Daniel Veillardcf461992000-03-14 18:30:20 +00006071 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006072 last->ocur = XML_ELEMENT_CONTENT_OPT;
6073 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006074 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006075 last->ocur = XML_ELEMENT_CONTENT_MULT;
6076 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006077 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006078 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6079 NEXT;
6080 } else {
6081 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6082 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006083 }
6084 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006085 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006086 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006087 if ((cur != NULL) && (last != NULL)) {
6088 cur->c2 = last;
6089 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006090 ctxt->entity = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006091 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006092 if (RAW == '?') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006093 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6094 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006095 } else if (RAW == '*') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006096 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6097 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006098 } else if (RAW == '+') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006099 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6100 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006101 }
6102 return(ret);
6103}
6104
6105/**
6106 * xmlParseElementContentDecl:
6107 * @ctxt: an XML parser context
6108 * @name: the name of the element being defined.
6109 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00006110 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006111 * parse the declaration for an Element content either Mixed or Children,
6112 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6113 *
6114 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00006115 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006116 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00006117 */
6118
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006119int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006120xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006121 xmlElementContentPtr *result) {
6122
6123 xmlElementContentPtr tree = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00006124 xmlParserInputPtr input = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006125 int res;
6126
6127 *result = NULL;
6128
Daniel Veillardcf461992000-03-14 18:30:20 +00006129 if (RAW != '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006131 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006132 "xmlParseElementContentDecl : '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006133 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006134 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006135 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006136 return(-1);
6137 }
6138 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006139 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006140 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006141 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006142 (NXT(2) == 'C') && (NXT(3) == 'D') &&
6143 (NXT(4) == 'A') && (NXT(5) == 'T') &&
6144 (NXT(6) == 'A')) {
6145 tree = xmlParseElementMixedContentDecl(ctxt);
6146 res = XML_ELEMENT_TYPE_MIXED;
6147 } else {
6148 tree = xmlParseElementChildrenContentDecl(ctxt);
6149 res = XML_ELEMENT_TYPE_ELEMENT;
6150 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006151 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
6152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData,
6154"Element content declaration doesn't start and stop in the same entity\n");
6155 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6156 ctxt->wellFormed = 0;
6157 ctxt->disableSAX = 1;
6158 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006159 SKIP_BLANKS;
6160 /****************************
Daniel Veillardcf461992000-03-14 18:30:20 +00006161 if (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006163 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006164 "xmlParseElementContentDecl : ')' expected\n");
6165 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006166 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006167 return(-1);
6168 }
6169 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00006170 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006171 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006172}
6173
Daniel Veillard11e00581998-10-24 18:27:49 +00006174/**
6175 * xmlParseElementDecl:
6176 * @ctxt: an XML parser context
6177 *
6178 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006179 *
6180 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6181 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006182 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006183 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00006184 *
6185 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00006186 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006187int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006188xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006189 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006190 int ret = -1;
6191 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006192
Daniel Veillardb05deb71999-08-10 19:04:08 +00006193 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006194 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006195 (NXT(2) == 'E') && (NXT(3) == 'L') &&
6196 (NXT(4) == 'E') && (NXT(5) == 'M') &&
6197 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006198 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006199 xmlParserInputPtr input = ctxt->input;
6200
Daniel Veillard260a68f1998-08-13 03:39:55 +00006201 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006202 if (!IS_BLANK(CUR)) {
6203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006204 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006205 "Space required after 'ELEMENT'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006207 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006208 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006209 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006210 SKIP_BLANKS;
6211 name = xmlParseName(ctxt);
6212 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006214 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006215 "xmlParseElementDecl: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006216 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006217 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006218 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006219 return(-1);
6220 }
6221 if (!IS_BLANK(CUR)) {
6222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006223 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006224 "Space required after the element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006225 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006226 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006227 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006228 }
6229 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006230 if ((RAW == 'E') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006231 (NXT(2) == 'P') && (NXT(3) == 'T') &&
6232 (NXT(4) == 'Y')) {
6233 SKIP(5);
6234 /*
6235 * Element must always be empty.
6236 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006237 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillardcf461992000-03-14 18:30:20 +00006238 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006239 (NXT(2) == 'Y')) {
6240 SKIP(3);
6241 /*
6242 * Element is a generic container.
6243 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006244 ret = XML_ELEMENT_TYPE_ANY;
Daniel Veillardcf461992000-03-14 18:30:20 +00006245 } else if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006246 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006247 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006248 /*
6249 * [ WFC: PEs in Internal Subset ] error handling.
6250 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006251 if ((RAW == '%') && (ctxt->external == 0) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00006252 (ctxt->inputNr == 1)) {
6253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6254 ctxt->sax->error(ctxt->userData,
6255 "PEReference: forbidden within markup decl in internal subset\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006256 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006257 } else {
6258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6259 ctxt->sax->error(ctxt->userData,
6260 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006261 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006262 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006263 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006264 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00006265 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006266 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006267 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00006268
Daniel Veillard260a68f1998-08-13 03:39:55 +00006269 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00006270 /*
6271 * Pop-up of finished entities.
6272 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006273 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard686d6b62000-01-03 11:08:02 +00006274 xmlPopInput(ctxt);
6275 SKIP_BLANKS;
6276
Daniel Veillardcf461992000-03-14 18:30:20 +00006277 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006279 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00006280 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006281 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006282 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006283 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006284 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006285 if (input != ctxt->input) {
6286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6287 ctxt->sax->error(ctxt->userData,
6288"Element declaration doesn't start and stop in the same entity\n");
6289 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6290 ctxt->wellFormed = 0;
6291 ctxt->disableSAX = 1;
6292 }
6293
Daniel Veillard260a68f1998-08-13 03:39:55 +00006294 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006295 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6296 (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006297 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6298 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006299 }
Daniel Veillard14fff061999-06-22 21:49:07 +00006300 if (content != NULL) {
6301 xmlFreeElementContent(content);
6302 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006303 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00006304 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006305 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006306 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006307 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006308}
6309
Daniel Veillard11e00581998-10-24 18:27:49 +00006310/**
6311 * xmlParseMarkupDecl:
6312 * @ctxt: an XML parser context
6313 *
6314 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00006315 *
6316 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6317 * NotationDecl | PI | Comment
6318 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006319 * [ VC: Proper Declaration/PE Nesting ]
6320 * TODO Parameter-entity replacement text must be properly nested with
6321 * markup declarations. That is to say, if either the first character
6322 * or the last character of a markup declaration (markupdecl above) is
6323 * contained in the replacement text for a parameter-entity reference,
6324 * both must be contained in the same replacement text.
6325 *
6326 * [ WFC: PEs in Internal Subset ]
6327 * In the internal DTD subset, parameter-entity references can occur
6328 * only where markup declarations can occur, not within markup declarations.
6329 * (This does not apply to references that occur in external parameter
6330 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006331 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006332void
6333xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006334 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006335 xmlParseElementDecl(ctxt);
6336 xmlParseAttributeListDecl(ctxt);
6337 xmlParseEntityDecl(ctxt);
6338 xmlParseNotationDecl(ctxt);
6339 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00006340 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006341 /*
6342 * This is only for internal subset. On external entities,
6343 * the replacement is done before parsing stage
6344 */
6345 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6346 xmlParsePEReference(ctxt);
6347 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006348}
6349
Daniel Veillard11e00581998-10-24 18:27:49 +00006350/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00006351 * xmlParseTextDecl:
6352 * @ctxt: an XML parser context
6353 *
6354 * parse an XML declaration header for external entities
6355 *
6356 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6357 *
Daniel Veillardcf461992000-03-14 18:30:20 +00006358 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
Daniel Veillard011b63c1999-06-02 17:44:04 +00006359 */
6360
Daniel Veillardcf461992000-03-14 18:30:20 +00006361void
Daniel Veillard011b63c1999-06-02 17:44:04 +00006362xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006363 xmlChar *version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006364
6365 /*
6366 * We know that '<?xml' is here.
6367 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006368 if ((RAW == '<') && (NXT(1) == '?') &&
6369 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6370 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6371 SKIP(5);
6372 } else {
6373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6374 ctxt->sax->error(ctxt->userData,
6375 "Text declaration '<?xml' required\n");
6376 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6377 ctxt->wellFormed = 0;
6378 ctxt->disableSAX = 1;
6379
6380 return;
6381 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006382
6383 if (!IS_BLANK(CUR)) {
6384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006385 ctxt->sax->error(ctxt->userData,
6386 "Space needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006387 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006388 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006389 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006390 }
6391 SKIP_BLANKS;
6392
6393 /*
6394 * We may have the VersionInfo here.
6395 */
6396 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006397 if (version == NULL)
6398 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00006399 ctxt->input->version = version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006400
6401 /*
6402 * We must have the encoding declaration
6403 */
6404 if (!IS_BLANK(CUR)) {
6405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006406 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006407 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006408 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006409 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006410 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006411 xmlParseEncodingDecl(ctxt);
6412 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6413 /*
6414 * The XML REC instructs us to stop parsing right here
6415 */
6416 return;
6417 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006418
6419 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006420 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006421 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006422 } else if (RAW == '>') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006423 /* Deprecated old WD ... */
6424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006425 ctxt->sax->error(ctxt->userData,
6426 "XML declaration must end-up with '?>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006427 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006428 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006429 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006430 NEXT;
6431 } else {
6432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006433 ctxt->sax->error(ctxt->userData,
6434 "parsing XML declaration: '?>' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006435 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006436 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006437 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006438 MOVETO_ENDTAG(CUR_PTR);
6439 NEXT;
6440 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006441}
6442
6443/*
6444 * xmlParseConditionalSections
6445 * @ctxt: an XML parser context
6446 *
6447 * TODO : Conditionnal section are not yet supported !
6448 *
6449 * [61] conditionalSect ::= includeSect | ignoreSect
6450 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6451 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6452 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6453 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6454 */
6455
6456void
6457xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006458 SKIP(3);
6459 SKIP_BLANKS;
6460 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6461 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6462 (NXT(6) == 'E')) {
6463 SKIP(7);
6464 SKIP_BLANKS;
6465 if (RAW != '[') {
6466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6467 ctxt->sax->error(ctxt->userData,
6468 "XML conditional section '[' expected\n");
6469 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6470 ctxt->wellFormed = 0;
6471 ctxt->disableSAX = 1;
6472 } else {
6473 NEXT;
6474 }
6475 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6476 (NXT(2) != '>'))) {
6477 const xmlChar *check = CUR_PTR;
6478 int cons = ctxt->input->consumed;
6479 int tok = ctxt->token;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006480
Daniel Veillardcf461992000-03-14 18:30:20 +00006481 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6482 xmlParseConditionalSections(ctxt);
6483 } else if (IS_BLANK(CUR)) {
6484 NEXT;
6485 } else if (RAW == '%') {
6486 xmlParsePEReference(ctxt);
6487 } else
6488 xmlParseMarkupDecl(ctxt);
6489
6490 /*
6491 * Pop-up of finished entities.
6492 */
6493 while ((RAW == 0) && (ctxt->inputNr > 1))
6494 xmlPopInput(ctxt);
6495
6496 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6497 (tok == ctxt->token)) {
6498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6499 ctxt->sax->error(ctxt->userData,
6500 "Content error in the external subset\n");
6501 ctxt->wellFormed = 0;
6502 ctxt->disableSAX = 1;
6503 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6504 break;
6505 }
6506 }
6507 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6508 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
6509 int state;
6510
6511 SKIP(6);
6512 SKIP_BLANKS;
6513 if (RAW != '[') {
6514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6515 ctxt->sax->error(ctxt->userData,
6516 "XML conditional section '[' expected\n");
6517 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6518 ctxt->wellFormed = 0;
6519 ctxt->disableSAX = 1;
6520 } else {
6521 NEXT;
6522 }
6523
6524 /*
6525 * Parse up to the end of the conditionnal section
6526 * But disable SAX event generating DTD building in the meantime
6527 */
6528 state = ctxt->disableSAX;
6529 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6530 (NXT(2) != '>'))) {
6531 const xmlChar *check = CUR_PTR;
6532 int cons = ctxt->input->consumed;
6533 int tok = ctxt->token;
6534
6535 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6536 xmlParseConditionalSections(ctxt);
6537 } else if (IS_BLANK(CUR)) {
6538 NEXT;
6539 } else if (RAW == '%') {
6540 xmlParsePEReference(ctxt);
6541 } else
6542 xmlParseMarkupDecl(ctxt);
6543
6544 /*
6545 * Pop-up of finished entities.
6546 */
6547 while ((RAW == 0) && (ctxt->inputNr > 1))
6548 xmlPopInput(ctxt);
6549
6550 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6551 (tok == ctxt->token)) {
6552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6553 ctxt->sax->error(ctxt->userData,
6554 "Content error in the external subset\n");
6555 ctxt->wellFormed = 0;
6556 ctxt->disableSAX = 1;
6557 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6558 break;
6559 }
6560 }
6561 ctxt->disableSAX = state;
6562 } else {
6563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6564 ctxt->sax->error(ctxt->userData,
6565 "XML conditional section INCLUDE or IGNORE keyword expected\n");
6566 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6567 ctxt->wellFormed = 0;
6568 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006569 }
6570
Daniel Veillardcf461992000-03-14 18:30:20 +00006571 if (RAW == 0)
Daniel Veillard71b656e2000-01-05 14:46:17 +00006572 SHRINK;
6573
Daniel Veillardcf461992000-03-14 18:30:20 +00006574 if (RAW == 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6576 ctxt->sax->error(ctxt->userData,
6577 "XML conditional section not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006578 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006579 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006580 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006581 } else {
6582 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006583 }
6584}
6585
6586/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00006587 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00006588 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00006589 * @ExternalID: the external identifier
6590 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00006591 *
6592 * parse Markup declarations from an external subset
6593 *
6594 * [30] extSubset ::= textDecl? extSubsetDecl
6595 *
6596 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006597 */
6598void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006599xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6600 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00006601 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006602 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard011b63c1999-06-02 17:44:04 +00006603 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6604 (NXT(4) == 'l')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006605 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006606 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6607 /*
6608 * The XML REC instructs us to stop parsing right here
6609 */
6610 ctxt->instate = XML_PARSER_EOF;
6611 return;
6612 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006613 }
6614 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006615 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00006616 }
6617 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6618 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6619
Daniel Veillardb05deb71999-08-10 19:04:08 +00006620 ctxt->instate = XML_PARSER_DTD;
6621 ctxt->external = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00006622 while (((RAW == '<') && (NXT(1) == '?')) ||
6623 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard011b63c1999-06-02 17:44:04 +00006624 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006625 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006626 int cons = ctxt->input->consumed;
Daniel Veillardcf461992000-03-14 18:30:20 +00006627 int tok = ctxt->token;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006628
Daniel Veillardcf461992000-03-14 18:30:20 +00006629 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006630 xmlParseConditionalSections(ctxt);
6631 } else if (IS_BLANK(CUR)) {
6632 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006633 } else if (RAW == '%') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006634 xmlParsePEReference(ctxt);
6635 } else
6636 xmlParseMarkupDecl(ctxt);
6637
6638 /*
6639 * Pop-up of finished entities.
6640 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006641 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006642 xmlPopInput(ctxt);
6643
Daniel Veillardcf461992000-03-14 18:30:20 +00006644 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6645 (tok == ctxt->token)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6647 ctxt->sax->error(ctxt->userData,
6648 "Content error in the external subset\n");
6649 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006650 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006651 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006652 break;
6653 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006654 }
6655
Daniel Veillardcf461992000-03-14 18:30:20 +00006656 if (RAW != 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6658 ctxt->sax->error(ctxt->userData,
6659 "Extra content at the end of the document\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006660 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006661 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006662 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006663 }
6664
6665}
6666
6667/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00006668 * xmlParseReference:
6669 * @ctxt: an XML parser context
6670 *
6671 * parse and handle entity references in content, depending on the SAX
6672 * interface, this may end-up in a call to character() if this is a
6673 * CharRef, a predefined entity, if there is no reference() callback.
6674 * or if the parser was asked to switch to that mode.
6675 *
6676 * [67] Reference ::= EntityRef | CharRef
6677 */
6678void
6679xmlParseReference(xmlParserCtxtPtr ctxt) {
6680 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006681 xmlChar *val;
Daniel Veillardcf461992000-03-14 18:30:20 +00006682 if (RAW != '&') return;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006683
Daniel Veillardb96e6431999-08-29 21:02:19 +00006684 if (ctxt->inputNr > 1) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006685 xmlChar cur[2] = { '&' , 0 } ;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006686
Daniel Veillardcf461992000-03-14 18:30:20 +00006687 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6688 (!ctxt->disableSAX))
Daniel Veillardb96e6431999-08-29 21:02:19 +00006689 ctxt->sax->characters(ctxt->userData, cur, 1);
6690 if (ctxt->token == '&')
6691 ctxt->token = 0;
6692 else {
6693 SKIP(1);
6694 }
6695 return;
6696 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006697 if (NXT(1) == '#') {
Daniel Veillardcf461992000-03-14 18:30:20 +00006698 int i = 0;
6699 xmlChar out[10];
6700 int hex = NXT(2);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006701 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006702
Daniel Veillardbe803962000-06-28 23:40:59 +00006703 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006704 /*
6705 * So we are using non-UTF-8 buffers
6706 * Check that the char fit on 8bits, if not
6707 * generate a CharRef.
6708 */
6709 if (val <= 0xFF) {
6710 out[0] = val;
6711 out[1] = 0;
6712 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6713 (!ctxt->disableSAX))
6714 ctxt->sax->characters(ctxt->userData, out, 1);
6715 } else {
6716 if ((hex == 'x') || (hex == 'X'))
6717 sprintf((char *)out, "#x%X", val);
6718 else
6719 sprintf((char *)out, "#%d", val);
6720 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6721 (!ctxt->disableSAX))
6722 ctxt->sax->reference(ctxt->userData, out);
6723 }
6724 } else {
6725 /*
6726 * Just encode the value in UTF-8
6727 */
6728 COPY_BUF(0 ,out, i, val);
6729 out[i] = 0;
6730 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6731 (!ctxt->disableSAX))
6732 ctxt->sax->characters(ctxt->userData, out, i);
6733 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006734 } else {
6735 ent = xmlParseEntityRef(ctxt);
6736 if (ent == NULL) return;
6737 if ((ent->name != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006738 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6739 xmlNodePtr list = NULL;
6740 int ret;
6741
6742
6743 /*
6744 * The first reference to the entity trigger a parsing phase
6745 * where the ent->children is filled with the result from
6746 * the parsing.
6747 */
6748 if (ent->children == NULL) {
6749 xmlChar *value;
6750 value = ent->content;
6751
6752 /*
6753 * Check that this entity is well formed
6754 */
6755 if ((value != NULL) &&
6756 (value[1] == 0) && (value[0] == '<') &&
6757 (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6758 /*
6759 * TODO: get definite answer on this !!!
6760 * Lots of entity decls are used to declare a single
6761 * char
6762 * <!ENTITY lt "<">
6763 * Which seems to be valid since
6764 * 2.4: The ampersand character (&) and the left angle
6765 * bracket (<) may appear in their literal form only
6766 * when used ... They are also legal within the literal
6767 * entity value of an internal entity declaration;i
6768 * see "4.3.2 Well-Formed Parsed Entities".
6769 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6770 * Looking at the OASIS test suite and James Clark
6771 * tests, this is broken. However the XML REC uses
6772 * it. Is the XML REC not well-formed ????
6773 * This is a hack to avoid this problem
6774 */
6775 list = xmlNewDocText(ctxt->myDoc, value);
6776 if (list != NULL) {
6777 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6778 (ent->children == NULL)) {
6779 ent->children = list;
6780 ent->last = list;
6781 list->parent = (xmlNodePtr) ent;
6782 } else {
6783 xmlFreeNodeList(list);
6784 }
6785 } else if (list != NULL) {
6786 xmlFreeNodeList(list);
6787 }
6788 } else {
6789 /*
6790 * 4.3.2: An internal general parsed entity is well-formed
6791 * if its replacement text matches the production labeled
6792 * content.
6793 */
6794 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6795 ctxt->depth++;
6796 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
6797 ctxt->sax, NULL, ctxt->depth,
6798 value, &list);
6799 ctxt->depth--;
6800 } else if (ent->etype ==
6801 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6802 ctxt->depth++;
6803 ret = xmlParseExternalEntity(ctxt->myDoc,
6804 ctxt->sax, NULL, ctxt->depth,
6805 ent->SystemID, ent->ExternalID, &list);
6806 ctxt->depth--;
6807 } else {
6808 ret = -1;
6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6810 ctxt->sax->error(ctxt->userData,
6811 "Internal: invalid entity type\n");
6812 }
6813 if (ret == XML_ERR_ENTITY_LOOP) {
6814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6815 ctxt->sax->error(ctxt->userData,
6816 "Detected entity reference loop\n");
6817 ctxt->wellFormed = 0;
6818 ctxt->disableSAX = 1;
6819 ctxt->errNo = XML_ERR_ENTITY_LOOP;
6820 } else if ((ret == 0) && (list != NULL)) {
6821 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6822 (ent->children == NULL)) {
6823 ent->children = list;
6824 while (list != NULL) {
6825 list->parent = (xmlNodePtr) ent;
6826 if (list->next == NULL)
6827 ent->last = list;
6828 list = list->next;
6829 }
6830 } else {
6831 xmlFreeNodeList(list);
6832 }
6833 } else if (ret > 0) {
6834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6835 ctxt->sax->error(ctxt->userData,
6836 "Entity value required\n");
6837 ctxt->errNo = ret;
6838 ctxt->wellFormed = 0;
6839 ctxt->disableSAX = 1;
6840 } else if (list != NULL) {
6841 xmlFreeNodeList(list);
6842 }
6843 }
6844 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00006845 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006846 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006847 /*
6848 * Create a node.
6849 */
6850 ctxt->sax->reference(ctxt->userData, ent->name);
6851 return;
6852 } else if (ctxt->replaceEntities) {
6853 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006854
Daniel Veillardb96e6431999-08-29 21:02:19 +00006855 input = xmlNewEntityInputStream(ctxt, ent);
6856 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00006857 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6858 (RAW == '<') && (NXT(1) == '?') &&
6859 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6860 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6861 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006862 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6863 /*
6864 * The XML REC instructs us to stop parsing right here
6865 */
6866 ctxt->instate = XML_PARSER_EOF;
6867 return;
6868 }
Daniel Veillardbe803962000-06-28 23:40:59 +00006869 if (input->standalone == 1) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6871 ctxt->sax->error(ctxt->userData,
6872 "external parsed entities cannot be standalone\n");
6873 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6874 ctxt->wellFormed = 0;
6875 ctxt->disableSAX = 1;
6876 }
6877 }
6878 /*
6879 * !!! TODO: build the tree under the entity first
6880 * 1234
6881 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00006882 return;
6883 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006884 }
6885 val = ent->content;
6886 if (val == NULL) return;
6887 /*
6888 * inline the entity.
6889 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006890 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6891 (!ctxt->disableSAX))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006892 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6893 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006894}
6895
Daniel Veillard11e00581998-10-24 18:27:49 +00006896/**
6897 * xmlParseEntityRef:
6898 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00006899 *
6900 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00006901 *
6902 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00006903 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006904 * [ WFC: Entity Declared ]
6905 * In a document without any DTD, a document with only an internal DTD
6906 * subset which contains no parameter entity references, or a document
6907 * with "standalone='yes'", the Name given in the entity reference
6908 * must match that in an entity declaration, except that well-formed
6909 * documents need not declare any of the following entities: amp, lt,
6910 * gt, apos, quot. The declaration of a parameter entity must precede
6911 * any reference to it. Similarly, the declaration of a general entity
6912 * must precede any reference to it which appears in a default value in an
6913 * attribute-list declaration. Note that if entities are declared in the
6914 * external subset or in external parameter entities, a non-validating
6915 * processor is not obligated to read and process their declarations;
6916 * for such documents, the rule that an entity must be declared is a
6917 * well-formedness constraint only if standalone='yes'.
6918 *
6919 * [ WFC: Parsed Entity ]
6920 * An entity reference must not contain the name of an unparsed entity
6921 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006922 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006923 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00006924xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006925xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006926 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00006927 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006928
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006929 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006930
Daniel Veillardcf461992000-03-14 18:30:20 +00006931 if (RAW == '&') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006932 NEXT;
6933 name = xmlParseName(ctxt);
6934 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00006936 ctxt->sax->error(ctxt->userData,
6937 "xmlParseEntityRef: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006938 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006939 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006940 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006941 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006942 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006943 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006944 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00006945 * Ask first SAX for entity resolution, otherwise try the
6946 * predefined set.
6947 */
6948 if (ctxt->sax != NULL) {
6949 if (ctxt->sax->getEntity != NULL)
6950 ent = ctxt->sax->getEntity(ctxt->userData, name);
6951 if (ent == NULL)
6952 ent = xmlGetPredefinedEntity(name);
6953 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006954 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006955 * [ WFC: Entity Declared ]
6956 * In a document without any DTD, a document with only an
6957 * internal DTD subset which contains no parameter entity
6958 * references, or a document with "standalone='yes'", the
6959 * Name given in the entity reference must match that in an
6960 * entity declaration, except that well-formed documents
6961 * need not declare any of the following entities: amp, lt,
6962 * gt, apos, quot.
6963 * The declaration of a parameter entity must precede any
6964 * reference to it.
6965 * Similarly, the declaration of a general entity must
6966 * precede any reference to it which appears in a default
6967 * value in an attribute-list declaration. Note that if
6968 * entities are declared in the external subset or in
6969 * external parameter entities, a non-validating processor
6970 * is not obligated to read and process their declarations;
6971 * for such documents, the rule that an entity must be
6972 * declared is a well-formedness constraint only if
6973 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006974 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00006975 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006976 if ((ctxt->standalone == 1) ||
6977 ((ctxt->hasExternalSubset == 0) &&
6978 (ctxt->hasPErefs == 0))) {
6979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006980 ctxt->sax->error(ctxt->userData,
6981 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006982 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006983 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006984 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006985 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006986 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6987 ctxt->sax->warning(ctxt->userData,
6988 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006989 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006990 }
6991 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006992
6993 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006994 * [ WFC: Parsed Entity ]
6995 * An entity reference must not contain the name of an
6996 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006997 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006998 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000 ctxt->sax->error(ctxt->userData,
7001 "Entity reference to unparsed entity %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007002 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007003 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007004 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007005 }
7006
7007 /*
7008 * [ WFC: No External Entity References ]
7009 * Attribute values cannot contain direct or indirect
7010 * entity references to external entities.
7011 */
7012 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007013 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7015 ctxt->sax->error(ctxt->userData,
7016 "Attribute references external entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007017 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007018 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007019 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007020 }
7021 /*
7022 * [ WFC: No < in Attribute Values ]
7023 * The replacement text of any entity referred to directly or
7024 * indirectly in an attribute value (other than "&lt;") must
7025 * not contain a <.
7026 */
7027 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00007028 (ent != NULL) &&
7029 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00007030 (ent->content != NULL) &&
7031 (xmlStrchr(ent->content, '<'))) {
7032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7033 ctxt->sax->error(ctxt->userData,
7034 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007035 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007036 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007037 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007038 }
7039
7040 /*
7041 * Internal check, no parameter entities here ...
7042 */
7043 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007044 switch (ent->etype) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007045 case XML_INTERNAL_PARAMETER_ENTITY:
7046 case XML_EXTERNAL_PARAMETER_ENTITY:
7047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007048 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007049 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007050 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007051 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007052 ctxt->disableSAX = 1;
7053 break;
7054 default:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007055 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007056 }
7057 }
7058
7059 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007060 * [ WFC: No Recursion ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00007061 * TODO A parsed entity must not contain a recursive reference
7062 * to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007063 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00007064
Daniel Veillard011b63c1999-06-02 17:44:04 +00007065 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007067 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007068 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007069 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007070 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007071 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007072 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00007073 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007074 }
7075 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007076 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007077}
Daniel Veillard10a2c651999-12-12 13:03:50 +00007078/**
7079 * xmlParseStringEntityRef:
7080 * @ctxt: an XML parser context
7081 * @str: a pointer to an index in the string
7082 *
7083 * parse ENTITY references declarations, but this version parses it from
7084 * a string value.
7085 *
7086 * [68] EntityRef ::= '&' Name ';'
7087 *
7088 * [ WFC: Entity Declared ]
7089 * In a document without any DTD, a document with only an internal DTD
7090 * subset which contains no parameter entity references, or a document
7091 * with "standalone='yes'", the Name given in the entity reference
7092 * must match that in an entity declaration, except that well-formed
7093 * documents need not declare any of the following entities: amp, lt,
7094 * gt, apos, quot. The declaration of a parameter entity must precede
7095 * any reference to it. Similarly, the declaration of a general entity
7096 * must precede any reference to it which appears in a default value in an
7097 * attribute-list declaration. Note that if entities are declared in the
7098 * external subset or in external parameter entities, a non-validating
7099 * processor is not obligated to read and process their declarations;
7100 * for such documents, the rule that an entity must be declared is a
7101 * well-formedness constraint only if standalone='yes'.
7102 *
7103 * [ WFC: Parsed Entity ]
7104 * An entity reference must not contain the name of an unparsed entity
7105 *
7106 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7107 * is updated to the current location in the string.
7108 */
7109xmlEntityPtr
7110xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7111 xmlChar *name;
7112 const xmlChar *ptr;
7113 xmlChar cur;
7114 xmlEntityPtr ent = NULL;
7115
Daniel Veillardcf461992000-03-14 18:30:20 +00007116 if ((str == NULL) || (*str == NULL))
7117 return(NULL);
Daniel Veillard10a2c651999-12-12 13:03:50 +00007118 ptr = *str;
7119 cur = *ptr;
7120 if (cur == '&') {
7121 ptr++;
7122 cur = *ptr;
7123 name = xmlParseStringName(ctxt, &ptr);
7124 if (name == NULL) {
7125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7126 ctxt->sax->error(ctxt->userData,
7127 "xmlParseEntityRef: no name\n");
7128 ctxt->errNo = XML_ERR_NAME_REQUIRED;
7129 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007130 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007131 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007132 if (*ptr == ';') {
7133 ptr++;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007134 /*
7135 * Ask first SAX for entity resolution, otherwise try the
7136 * predefined set.
7137 */
7138 if (ctxt->sax != NULL) {
7139 if (ctxt->sax->getEntity != NULL)
7140 ent = ctxt->sax->getEntity(ctxt->userData, name);
7141 if (ent == NULL)
7142 ent = xmlGetPredefinedEntity(name);
7143 }
7144 /*
7145 * [ WFC: Entity Declared ]
7146 * In a document without any DTD, a document with only an
7147 * internal DTD subset which contains no parameter entity
7148 * references, or a document with "standalone='yes'", the
7149 * Name given in the entity reference must match that in an
7150 * entity declaration, except that well-formed documents
7151 * need not declare any of the following entities: amp, lt,
7152 * gt, apos, quot.
7153 * The declaration of a parameter entity must precede any
7154 * reference to it.
7155 * Similarly, the declaration of a general entity must
7156 * precede any reference to it which appears in a default
7157 * value in an attribute-list declaration. Note that if
7158 * entities are declared in the external subset or in
7159 * external parameter entities, a non-validating processor
7160 * is not obligated to read and process their declarations;
7161 * for such documents, the rule that an entity must be
7162 * declared is a well-formedness constraint only if
7163 * standalone='yes'.
7164 */
7165 if (ent == NULL) {
7166 if ((ctxt->standalone == 1) ||
7167 ((ctxt->hasExternalSubset == 0) &&
7168 (ctxt->hasPErefs == 0))) {
7169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7170 ctxt->sax->error(ctxt->userData,
7171 "Entity '%s' not defined\n", name);
7172 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7173 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007174 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007175 } else {
7176 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7177 ctxt->sax->warning(ctxt->userData,
7178 "Entity '%s' not defined\n", name);
7179 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
7180 }
7181 }
7182
7183 /*
7184 * [ WFC: Parsed Entity ]
7185 * An entity reference must not contain the name of an
7186 * unparsed entity
7187 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007188 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7190 ctxt->sax->error(ctxt->userData,
7191 "Entity reference to unparsed entity %s\n", name);
7192 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
7193 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007194 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007195 }
7196
7197 /*
7198 * [ WFC: No External Entity References ]
7199 * Attribute values cannot contain direct or indirect
7200 * entity references to external entities.
7201 */
7202 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007203 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7205 ctxt->sax->error(ctxt->userData,
7206 "Attribute references external entity '%s'\n", name);
7207 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
7208 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007209 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007210 }
7211 /*
7212 * [ WFC: No < in Attribute Values ]
7213 * The replacement text of any entity referred to directly or
7214 * indirectly in an attribute value (other than "&lt;") must
7215 * not contain a <.
7216 */
7217 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7218 (ent != NULL) &&
7219 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
7220 (ent->content != NULL) &&
7221 (xmlStrchr(ent->content, '<'))) {
7222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7223 ctxt->sax->error(ctxt->userData,
7224 "'<' in entity '%s' is not allowed in attributes values\n", name);
7225 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
7226 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007227 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007228 }
7229
7230 /*
7231 * Internal check, no parameter entities here ...
7232 */
7233 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007234 switch (ent->etype) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007235 case XML_INTERNAL_PARAMETER_ENTITY:
7236 case XML_EXTERNAL_PARAMETER_ENTITY:
7237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7238 ctxt->sax->error(ctxt->userData,
7239 "Attempt to reference the parameter entity '%s'\n", name);
7240 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
7241 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007242 ctxt->disableSAX = 1;
7243 break;
7244 default:
Daniel Veillard10a2c651999-12-12 13:03:50 +00007245 break;
7246 }
7247 }
7248
7249 /*
7250 * [ WFC: No Recursion ]
7251 * TODO A parsed entity must not contain a recursive reference
7252 * to itself, either directly or indirectly.
7253 */
7254
7255 } else {
7256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7257 ctxt->sax->error(ctxt->userData,
7258 "xmlParseEntityRef: expecting ';'\n");
7259 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7260 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007261 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007262 }
7263 xmlFree(name);
7264 }
7265 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007266 *str = ptr;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007267 return(ent);
7268}
Daniel Veillard260a68f1998-08-13 03:39:55 +00007269
Daniel Veillard11e00581998-10-24 18:27:49 +00007270/**
7271 * xmlParsePEReference:
7272 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00007273 *
7274 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00007275 * The entity content is handled directly by pushing it's content as
7276 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007277 *
7278 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00007279 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007280 * [ WFC: No Recursion ]
7281 * TODO A parsed entity must not contain a recursive
7282 * reference to itself, either directly or indirectly.
7283 *
7284 * [ WFC: Entity Declared ]
7285 * In a document without any DTD, a document with only an internal DTD
7286 * subset which contains no parameter entity references, or a document
7287 * with "standalone='yes'", ... ... The declaration of a parameter
7288 * entity must precede any reference to it...
7289 *
7290 * [ VC: Entity Declared ]
7291 * In a document with an external subset or external parameter entities
7292 * with "standalone='no'", ... ... The declaration of a parameter entity
7293 * must precede any reference to it...
7294 *
7295 * [ WFC: In DTD ]
7296 * Parameter-entity references may only appear in the DTD.
7297 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007298 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00007299void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007300xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007301 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00007302 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00007303 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007304
Daniel Veillardcf461992000-03-14 18:30:20 +00007305 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007306 NEXT;
7307 name = xmlParseName(ctxt);
7308 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007310 ctxt->sax->error(ctxt->userData,
7311 "xmlParsePEReference: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007312 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007313 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007314 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007315 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007316 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007317 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007318 if ((ctxt->sax != NULL) &&
7319 (ctxt->sax->getParameterEntity != NULL))
7320 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7321 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007322 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007323 /*
7324 * [ WFC: Entity Declared ]
7325 * In a document without any DTD, a document with only an
7326 * internal DTD subset which contains no parameter entity
7327 * references, or a document with "standalone='yes'", ...
7328 * ... The declaration of a parameter entity must precede
7329 * any reference to it...
7330 */
7331 if ((ctxt->standalone == 1) ||
7332 ((ctxt->hasExternalSubset == 0) &&
7333 (ctxt->hasPErefs == 0))) {
7334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7335 ctxt->sax->error(ctxt->userData,
7336 "PEReference: %%%s; not found\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007337 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007338 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007339 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007340 } else {
7341 /*
7342 * [ VC: Entity Declared ]
7343 * In a document with an external subset or external
7344 * parameter entities with "standalone='no'", ...
7345 * ... The declaration of a parameter entity must precede
7346 * any reference to it...
7347 */
7348 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7349 ctxt->sax->warning(ctxt->userData,
7350 "PEReference: %%%s; not found\n", name);
7351 ctxt->valid = 0;
7352 }
Daniel Veillardccb09631998-10-27 06:21:04 +00007353 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007354 /*
7355 * Internal checking in case the entity quest barfed
7356 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007357 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7358 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007359 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7360 ctxt->sax->warning(ctxt->userData,
7361 "Internal: %%%s; is not a parameter entity\n", name);
7362 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007363 /*
7364 * TODO !!!
7365 * handle the extra spaces added before and after
7366 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7367 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00007368 input = xmlNewEntityInputStream(ctxt, entity);
7369 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00007370 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7371 (RAW == '<') && (NXT(1) == '?') &&
7372 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7373 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7374 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007375 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7376 /*
7377 * The XML REC instructs us to stop parsing
7378 * right here
7379 */
7380 ctxt->instate = XML_PARSER_EOF;
7381 xmlFree(name);
7382 return;
7383 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007384 }
7385 if (ctxt->token == 0)
7386 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00007387 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007388 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00007389 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007390 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007392 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007393 "xmlParsePEReference: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007394 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007395 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007396 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007397 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00007398 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007399 }
7400 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007401}
7402
Daniel Veillard11e00581998-10-24 18:27:49 +00007403/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00007404 * xmlParseStringPEReference:
7405 * @ctxt: an XML parser context
7406 * @str: a pointer to an index in the string
7407 *
7408 * parse PEReference declarations
7409 *
7410 * [69] PEReference ::= '%' Name ';'
7411 *
7412 * [ WFC: No Recursion ]
7413 * TODO A parsed entity must not contain a recursive
7414 * reference to itself, either directly or indirectly.
7415 *
7416 * [ WFC: Entity Declared ]
7417 * In a document without any DTD, a document with only an internal DTD
7418 * subset which contains no parameter entity references, or a document
7419 * with "standalone='yes'", ... ... The declaration of a parameter
7420 * entity must precede any reference to it...
7421 *
7422 * [ VC: Entity Declared ]
7423 * In a document with an external subset or external parameter entities
7424 * with "standalone='no'", ... ... The declaration of a parameter entity
7425 * must precede any reference to it...
7426 *
7427 * [ WFC: In DTD ]
7428 * Parameter-entity references may only appear in the DTD.
7429 * NOTE: misleading but this is handled.
7430 *
7431 * Returns the string of the entity content.
7432 * str is updated to the current value of the index
7433 */
7434xmlEntityPtr
7435xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7436 const xmlChar *ptr;
7437 xmlChar cur;
7438 xmlChar *name;
7439 xmlEntityPtr entity = NULL;
7440
7441 if ((str == NULL) || (*str == NULL)) return(NULL);
7442 ptr = *str;
7443 cur = *ptr;
7444 if (cur == '%') {
7445 ptr++;
7446 cur = *ptr;
7447 name = xmlParseStringName(ctxt, &ptr);
7448 if (name == NULL) {
7449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7450 ctxt->sax->error(ctxt->userData,
7451 "xmlParseStringPEReference: no name\n");
7452 ctxt->errNo = XML_ERR_NAME_REQUIRED;
7453 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007454 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007455 } else {
7456 cur = *ptr;
7457 if (cur == ';') {
7458 ptr++;
7459 cur = *ptr;
7460 if ((ctxt->sax != NULL) &&
7461 (ctxt->sax->getParameterEntity != NULL))
7462 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7463 name);
7464 if (entity == NULL) {
7465 /*
7466 * [ WFC: Entity Declared ]
7467 * In a document without any DTD, a document with only an
7468 * internal DTD subset which contains no parameter entity
7469 * references, or a document with "standalone='yes'", ...
7470 * ... The declaration of a parameter entity must precede
7471 * any reference to it...
7472 */
7473 if ((ctxt->standalone == 1) ||
7474 ((ctxt->hasExternalSubset == 0) &&
7475 (ctxt->hasPErefs == 0))) {
7476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7477 ctxt->sax->error(ctxt->userData,
7478 "PEReference: %%%s; not found\n", name);
7479 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7480 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007481 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007482 } else {
7483 /*
7484 * [ VC: Entity Declared ]
7485 * In a document with an external subset or external
7486 * parameter entities with "standalone='no'", ...
7487 * ... The declaration of a parameter entity must
7488 * precede any reference to it...
7489 */
7490 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7491 ctxt->sax->warning(ctxt->userData,
7492 "PEReference: %%%s; not found\n", name);
7493 ctxt->valid = 0;
7494 }
7495 } else {
7496 /*
7497 * Internal checking in case the entity quest barfed
7498 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007499 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7500 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007501 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7502 ctxt->sax->warning(ctxt->userData,
7503 "Internal: %%%s; is not a parameter entity\n", name);
7504 }
7505 }
7506 ctxt->hasPErefs = 1;
7507 } else {
7508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7509 ctxt->sax->error(ctxt->userData,
7510 "xmlParseStringPEReference: expecting ';'\n");
7511 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7512 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007513 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007514 }
7515 xmlFree(name);
7516 }
7517 }
7518 *str = ptr;
7519 return(entity);
7520}
7521
7522/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007523 * xmlParseDocTypeDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00007524 * @ctxt: an XML parser context
7525 *
7526 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00007527 *
7528 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7529 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00007530 *
7531 * [ VC: Root Element Type ]
7532 * The Name in the document type declaration must match the element
7533 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007534 */
7535
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007536void
7537xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007538 xmlChar *name = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007539 xmlChar *ExternalID = NULL;
7540 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007541
7542 /*
7543 * We know that '<!DOCTYPE' has been detected.
7544 */
7545 SKIP(9);
7546
7547 SKIP_BLANKS;
7548
7549 /*
7550 * Parse the DOCTYPE name.
7551 */
7552 name = xmlParseName(ctxt);
7553 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007555 ctxt->sax->error(ctxt->userData,
7556 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007557 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007558 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007559 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007560 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007561 ctxt->intSubName = name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007562
7563 SKIP_BLANKS;
7564
7565 /*
7566 * Check for SystemID and ExternalID
7567 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00007568 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007569
7570 if ((URI != NULL) || (ExternalID != NULL)) {
7571 ctxt->hasExternalSubset = 1;
7572 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007573 ctxt->extSubURI = URI;
7574 ctxt->extSubSystem = ExternalID;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007575
Daniel Veillard260a68f1998-08-13 03:39:55 +00007576 SKIP_BLANKS;
7577
Daniel Veillard011b63c1999-06-02 17:44:04 +00007578 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007579 * Create and update the internal subset.
Daniel Veillard011b63c1999-06-02 17:44:04 +00007580 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007581 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7582 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007583 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007584
7585 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007586 * Is there any internal subset declarations ?
7587 * they are handled separately in xmlParseInternalSubset()
7588 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007589 if (RAW == '[')
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007590 return;
7591
7592 /*
7593 * We should be at the end of the DOCTYPE declaration.
7594 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007595 if (RAW != '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7598 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007599 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007600 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7601 }
7602 NEXT;
7603}
7604
7605/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007606 * xmlParseInternalsubset:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007607 * @ctxt: an XML parser context
7608 *
7609 * parse the internal subset declaration
7610 *
7611 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7612 */
7613
7614void
7615xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7616 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007617 * Is there any DTD definition ?
7618 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007619 if (RAW == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007620 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007621 NEXT;
7622 /*
7623 * Parse the succession of Markup declarations and
7624 * PEReferences.
7625 * Subsequence (markupdecl | PEReference | S)*
7626 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007627 while (RAW != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007628 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007629 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007630
7631 SKIP_BLANKS;
7632 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00007633 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007634
Daniel Veillard011b63c1999-06-02 17:44:04 +00007635 /*
7636 * Pop-up of finished entities.
7637 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007638 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00007639 xmlPopInput(ctxt);
7640
Daniel Veillardc26087b1999-08-30 11:23:51 +00007641 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00007642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7643 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007644 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00007645 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007646 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007647 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007648 break;
7649 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007650 }
Daniel Veillard36650692000-07-21 15:16:39 +00007651 if (RAW == ']') {
7652 NEXT;
7653 SKIP_BLANKS;
7654 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007655 }
7656
7657 /*
7658 * We should be at the end of the DOCTYPE declaration.
7659 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007660 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007662 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007663 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007664 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007665 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007666 }
7667 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007668}
7669
Daniel Veillard11e00581998-10-24 18:27:49 +00007670/**
7671 * xmlParseAttribute:
7672 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007673 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00007674 *
7675 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00007676 *
7677 * [41] Attribute ::= Name Eq AttValue
7678 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007679 * [ WFC: No External Entity References ]
7680 * Attribute values cannot contain direct or indirect entity references
7681 * to external entities.
7682 *
7683 * [ WFC: No < in Attribute Values ]
7684 * The replacement text of any entity referred to directly or indirectly in
7685 * an attribute value (other than "&lt;") must not contain a <.
7686 *
7687 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00007688 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00007689 * declared for it.
7690 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007691 * [25] Eq ::= S? '=' S?
7692 *
7693 * With namespace:
7694 *
7695 * [NS 11] Attribute ::= QName Eq AttValue
7696 *
7697 * Also the case QName == xmlns:??? is handled independently as a namespace
7698 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00007699 *
Daniel Veillard517752b1999-04-05 12:20:10 +00007700 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007701 */
7702
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007703xmlChar *
7704xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7705 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007706
Daniel Veillard517752b1999-04-05 12:20:10 +00007707 *value = NULL;
7708 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007709 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007711 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007712 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007713 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007714 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillardccb09631998-10-27 06:21:04 +00007715 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007716 }
7717
7718 /*
7719 * read the value
7720 */
7721 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007722 if (RAW == '=') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007723 NEXT;
7724 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00007725 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007726 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007727 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007729 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007730 "Specification mandate value for attribute %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007731 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007732 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007733 ctxt->disableSAX = 1;
7734 xmlFree(name);
Daniel Veillardccb09631998-10-27 06:21:04 +00007735 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007736 }
7737
Daniel Veillardcf461992000-03-14 18:30:20 +00007738 /*
7739 * Check that xml:lang conforms to the specification
7740 */
7741 if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7742 if (!xmlCheckLanguageID(val)) {
7743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7744 ctxt->sax->error(ctxt->userData,
7745 "Invalid value for xml:lang : %s\n", val);
7746 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7747 ctxt->wellFormed = 0;
7748 ctxt->disableSAX = 1;
7749 }
7750 }
7751
7752 /*
7753 * Check that xml:space conforms to the specification
7754 */
7755 if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7756 if (!xmlStrcmp(val, BAD_CAST "default"))
7757 *(ctxt->space) = 0;
7758 else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7759 *(ctxt->space) = 1;
7760 else {
7761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7762 ctxt->sax->error(ctxt->userData,
7763"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7764 val);
7765 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7766 ctxt->wellFormed = 0;
7767 ctxt->disableSAX = 1;
7768 }
7769 }
7770
Daniel Veillard517752b1999-04-05 12:20:10 +00007771 *value = val;
7772 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007773}
7774
Daniel Veillard11e00581998-10-24 18:27:49 +00007775/**
7776 * xmlParseStartTag:
7777 * @ctxt: an XML parser context
7778 *
7779 * parse a start of tag either for rule element or
7780 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007781 *
7782 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7783 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007784 * [ WFC: Unique Att Spec ]
7785 * No attribute name may appear more than once in the same start-tag or
7786 * empty-element tag.
7787 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007788 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7789 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007790 * [ WFC: Unique Att Spec ]
7791 * No attribute name may appear more than once in the same start-tag or
7792 * empty-element tag.
7793 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007794 * With namespace:
7795 *
7796 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7797 *
7798 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00007799 *
Daniel Veillard06047432000-04-24 11:33:38 +00007800 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00007801 */
7802
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007803xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00007804xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007805 xmlChar *name;
7806 xmlChar *attname;
7807 xmlChar *attvalue;
7808 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00007809 int nbatts = 0;
7810 int maxatts = 0;
7811 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007812
Daniel Veillardcf461992000-03-14 18:30:20 +00007813 if (RAW != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007814 NEXT;
7815
Daniel Veillard517752b1999-04-05 12:20:10 +00007816 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007817 if (name == NULL) {
7818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007819 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007820 "xmlParseStartTag: invalid element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007821 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007822 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007823 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00007824 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007825 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007826
7827 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007828 * Now parse the attributes, it ends up with the ending
7829 *
7830 * (S Attribute)* S?
7831 */
7832 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007833 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007834
7835 while ((IS_CHAR(RAW)) &&
7836 (RAW != '>') &&
7837 ((RAW != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007838 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007839 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007840
Daniel Veillard517752b1999-04-05 12:20:10 +00007841 attname = xmlParseAttribute(ctxt, &attvalue);
7842 if ((attname != NULL) && (attvalue != NULL)) {
7843 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007844 * [ WFC: Unique Att Spec ]
7845 * No attribute name may appear more than once in the same
7846 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00007847 */
7848 for (i = 0; i < nbatts;i += 2) {
7849 if (!xmlStrcmp(atts[i], attname)) {
7850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007851 ctxt->sax->error(ctxt->userData,
7852 "Attribute %s redefined\n",
7853 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00007854 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007855 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007856 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard6454aec1999-09-02 22:04:43 +00007857 xmlFree(attname);
7858 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007859 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00007860 }
7861 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007862
Daniel Veillard517752b1999-04-05 12:20:10 +00007863 /*
7864 * Add the pair to atts
7865 */
7866 if (atts == NULL) {
7867 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007868 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00007869 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007870 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007871 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00007872 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00007873 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00007874 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00007875 maxatts *= 2;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007876 atts = (const xmlChar **) xmlRealloc(atts,
7877 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00007878 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007879 fprintf(stderr, "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007880 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00007881 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00007882 }
7883 }
7884 atts[nbatts++] = attname;
7885 atts[nbatts++] = attvalue;
7886 atts[nbatts] = NULL;
7887 atts[nbatts + 1] = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00007888 } else {
7889 if (attname != NULL)
7890 xmlFree(attname);
7891 if (attvalue != NULL)
7892 xmlFree(attvalue);
Daniel Veillard517752b1999-04-05 12:20:10 +00007893 }
7894
Daniel Veillardb96e6431999-08-29 21:02:19 +00007895failed:
Daniel Veillardcf461992000-03-14 18:30:20 +00007896
7897 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7898 break;
7899 if (!IS_BLANK(RAW)) {
7900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7901 ctxt->sax->error(ctxt->userData,
7902 "attributes construct error\n");
7903 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7904 ctxt->wellFormed = 0;
7905 ctxt->disableSAX = 1;
7906 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007907 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007908 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007910 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00007911 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007912 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007913 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007914 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007915 break;
7916 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007917 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007918 }
7919
7920 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007921 * SAX: Start of Element !
7922 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007923 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7924 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007925 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00007926
Daniel Veillard517752b1999-04-05 12:20:10 +00007927 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007928 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard6454aec1999-09-02 22:04:43 +00007929 xmlFree(atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00007930 }
Daniel Veillard14fff061999-06-22 21:49:07 +00007931 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007932}
7933
Daniel Veillard11e00581998-10-24 18:27:49 +00007934/**
7935 * xmlParseEndTag:
7936 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00007937 *
7938 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00007939 *
7940 * [42] ETag ::= '</' Name S? '>'
7941 *
7942 * With namespace
7943 *
Daniel Veillard517752b1999-04-05 12:20:10 +00007944 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00007945 */
7946
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007947void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007948xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007949 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007950 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007951
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007952 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007953 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007955 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007956 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007957 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007958 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007959 return;
7960 }
7961 SKIP(2);
7962
Daniel Veillard517752b1999-04-05 12:20:10 +00007963 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007964
7965 /*
7966 * We should definitely be at the ending "S? '>'" part
7967 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007968 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007969 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007970 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007972 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007973 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007974 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007975 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007976 } else
7977 NEXT;
7978
Daniel Veillard517752b1999-04-05 12:20:10 +00007979 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007980 * [ WFC: Element Type Match ]
7981 * The Name in an element's end-tag must match the element type in the
7982 * start-tag.
7983 *
Daniel Veillard14fff061999-06-22 21:49:07 +00007984 */
Daniel Veillardda07c342000-01-25 18:31:22 +00007985 if ((name == NULL) || (ctxt->name == NULL) ||
7986 (xmlStrcmp(name, ctxt->name))) {
7987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7988 if ((name != NULL) && (ctxt->name != NULL)) {
7989 ctxt->sax->error(ctxt->userData,
7990 "Opening and ending tag mismatch: %s and %s\n",
7991 ctxt->name, name);
7992 } else if (ctxt->name != NULL) {
7993 ctxt->sax->error(ctxt->userData,
7994 "Ending tag eror for: %s\n", ctxt->name);
7995 } else {
7996 ctxt->sax->error(ctxt->userData,
7997 "Ending tag error: internal error ???\n");
7998 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007999
Daniel Veillardda07c342000-01-25 18:31:22 +00008000 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008001 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillard14fff061999-06-22 21:49:07 +00008002 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008003 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00008004 }
8005
8006 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00008007 * SAX: End of Tag
8008 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008009 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8010 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008011 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00008012
8013 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008014 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008015 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008016 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008017 if (oldname != NULL) {
8018#ifdef DEBUG_STACK
8019 fprintf(stderr,"Close: popped %s\n", oldname);
8020#endif
8021 xmlFree(oldname);
8022 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008023 return;
8024}
8025
Daniel Veillard11e00581998-10-24 18:27:49 +00008026/**
8027 * xmlParseCDSect:
8028 * @ctxt: an XML parser context
8029 *
8030 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008031 *
8032 * [18] CDSect ::= CDStart CData CDEnd
8033 *
8034 * [19] CDStart ::= '<![CDATA['
8035 *
8036 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8037 *
8038 * [21] CDEnd ::= ']]>'
8039 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008040void
8041xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008042 xmlChar *buf = NULL;
8043 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008044 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00008045 int r, rl;
8046 int s, sl;
8047 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008048
Daniel Veillardb05deb71999-08-10 19:04:08 +00008049 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008050 (NXT(2) == '[') && (NXT(3) == 'C') &&
8051 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8052 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8053 (NXT(8) == '[')) {
8054 SKIP(9);
8055 } else
8056 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008057
8058 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillardcf461992000-03-14 18:30:20 +00008059 r = CUR_CHAR(rl);
8060 if (!IS_CHAR(r)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008062 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008063 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008064 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008065 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008066 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008067 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008068 return;
8069 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008070 NEXTL(rl);
8071 s = CUR_CHAR(sl);
8072 if (!IS_CHAR(s)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008074 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008075 "CData section not finished\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008076 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008077 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008078 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008079 ctxt->instate = XML_PARSER_CONTENT;
8080 return;
8081 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008082 NEXTL(sl);
8083 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00008084 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8085 if (buf == NULL) {
8086 fprintf(stderr, "malloc of %d byte failed\n", size);
8087 return;
8088 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00008089 while (IS_CHAR(cur) &&
8090 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008091 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008092 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008093 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008094 if (buf == NULL) {
8095 fprintf(stderr, "realloc of %d byte failed\n", size);
8096 return;
8097 }
8098 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008099 COPY_BUF(rl,buf,len,r);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008100 r = s;
Daniel Veillardcf461992000-03-14 18:30:20 +00008101 rl = sl;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008102 s = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00008103 sl = l;
8104 NEXTL(l);
8105 cur = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008106 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008107 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008108 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008109 if (cur != '>') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008111 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008112 "CData section not finished\n%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008113 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008114 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008115 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00008116 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008117 return;
8118 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008119 NEXTL(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008120
8121 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00008122 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008123 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008124 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008125 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00008126 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008127 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008128 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008129}
8130
Daniel Veillard11e00581998-10-24 18:27:49 +00008131/**
8132 * xmlParseContent:
8133 * @ctxt: an XML parser context
8134 *
8135 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00008136 *
8137 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8138 */
8139
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008140void
8141xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008142 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008143 while (((RAW != 0) || (ctxt->token != 0)) &&
8144 ((RAW != '<') || (NXT(1) != '/'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008145 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008146 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008147 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008148
8149 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008150 * Handle possible processed charrefs.
8151 */
8152 if (ctxt->token != 0) {
8153 xmlParseCharData(ctxt, 0);
8154 }
8155 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008156 * First case : a Processing Instruction.
8157 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008158 else if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008159 xmlParsePI(ctxt);
8160 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008161
Daniel Veillard260a68f1998-08-13 03:39:55 +00008162 /*
8163 * Second case : a CDSection
8164 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008165 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008166 (NXT(2) == '[') && (NXT(3) == 'C') &&
8167 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8168 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8169 (NXT(8) == '[')) {
8170 xmlParseCDSect(ctxt);
8171 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008172
Daniel Veillard260a68f1998-08-13 03:39:55 +00008173 /*
8174 * Third case : a comment
8175 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008176 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008177 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00008178 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008179 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008180 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008181
Daniel Veillard260a68f1998-08-13 03:39:55 +00008182 /*
8183 * Fourth case : a sub-element.
8184 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008185 else if (RAW == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00008186 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008187 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008188
Daniel Veillard260a68f1998-08-13 03:39:55 +00008189 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00008190 * Fifth case : a reference. If if has not been resolved,
8191 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00008192 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00008193
Daniel Veillardcf461992000-03-14 18:30:20 +00008194 else if (RAW == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008195 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008196 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008197
Daniel Veillard260a68f1998-08-13 03:39:55 +00008198 /*
8199 * Last case, text. Note that References are handled directly.
8200 */
8201 else {
8202 xmlParseCharData(ctxt, 0);
8203 }
8204
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008205 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008206 /*
8207 * Pop-up of finished entities.
8208 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008209 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillardbc50b591999-03-01 12:28:53 +00008210 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00008211 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008212
Daniel Veillardb96e6431999-08-29 21:02:19 +00008213 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8214 (tok == ctxt->token)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008216 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008217 "detected an error in element content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008218 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008219 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008220 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008221 break;
8222 }
8223 }
8224}
8225
Daniel Veillard11e00581998-10-24 18:27:49 +00008226/**
8227 * xmlParseElement:
8228 * @ctxt: an XML parser context
8229 *
8230 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00008231 *
8232 * [39] element ::= EmptyElemTag | STag content ETag
8233 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008234 * [ WFC: Element Type Match ]
8235 * The Name in an element's end-tag must match the element type in the
8236 * start-tag.
8237 *
8238 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00008239 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00008240 * where the Name matches the element type and one of the following holds:
8241 * - The declaration matches EMPTY and the element has no content.
8242 * - The declaration matches children and the sequence of child elements
8243 * belongs to the language generated by the regular expression in the
8244 * content model, with optional white space (characters matching the
8245 * nonterminal S) between each pair of child elements.
8246 * - The declaration matches Mixed and the content consists of character
8247 * data and child elements whose types match names in the content model.
8248 * - The declaration matches ANY, and the types of any child elements have
8249 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008250 */
8251
Daniel Veillard517752b1999-04-05 12:20:10 +00008252void
Daniel Veillard1e346af1999-02-22 10:33:01 +00008253xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008254 const xmlChar *openTag = CUR_PTR;
8255 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008256 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008257 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00008258 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008259
8260 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00008261 if (ctxt->record_info) {
8262 node_info.begin_pos = ctxt->input->consumed +
8263 (CUR_PTR - ctxt->input->base);
8264 node_info.begin_line = ctxt->input->line;
8265 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008266
Daniel Veillardcf461992000-03-14 18:30:20 +00008267 if (ctxt->spaceNr == 0)
8268 spacePush(ctxt, -1);
8269 else
8270 spacePush(ctxt, *ctxt->space);
8271
Daniel Veillard14fff061999-06-22 21:49:07 +00008272 name = xmlParseStartTag(ctxt);
8273 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008274 spacePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00008275 return;
8276 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008277 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00008278 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008279
8280 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00008281 * [ VC: Root Element Type ]
8282 * The Name in the document type declaration must match the element
8283 * type of the root element.
8284 */
8285 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008286 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillardb05deb71999-08-10 19:04:08 +00008287 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8288
8289 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008290 * Check for an Empty Element.
8291 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008292 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008293 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008294 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8295 (!ctxt->disableSAX))
Daniel Veillard14fff061999-06-22 21:49:07 +00008296 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008297 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008298 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008299 if (oldname != NULL) {
8300#ifdef DEBUG_STACK
8301 fprintf(stderr,"Close: popped %s\n", oldname);
8302#endif
8303 xmlFree(oldname);
8304 }
Daniel Veillard87b95392000-08-12 21:12:04 +00008305 if ( ret != NULL && ctxt->record_info ) {
8306 node_info.end_pos = ctxt->input->consumed +
8307 (CUR_PTR - ctxt->input->base);
8308 node_info.end_line = ctxt->input->line;
8309 node_info.node = ret;
8310 xmlParserAddNodeInfo(ctxt, &node_info);
8311 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008312 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008313 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008314 if (RAW == '>') {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008315 NEXT;
8316 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008318 ctxt->sax->error(ctxt->userData,
8319 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00008320 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008321 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008322 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008323 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008324
8325 /*
8326 * end of parsing of this node.
8327 */
8328 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008329 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008330 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008331 if (oldname != NULL) {
8332#ifdef DEBUG_STACK
8333 fprintf(stderr,"Close: popped %s\n", oldname);
8334#endif
8335 xmlFree(oldname);
8336 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00008337
8338 /*
8339 * Capture end position and add node
8340 */
8341 if ( ret != NULL && ctxt->record_info ) {
8342 node_info.end_pos = ctxt->input->consumed +
8343 (CUR_PTR - ctxt->input->base);
8344 node_info.end_line = ctxt->input->line;
8345 node_info.node = ret;
8346 xmlParserAddNodeInfo(ctxt, &node_info);
8347 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008348 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008349 }
8350
8351 /*
8352 * Parse the content of the element:
8353 */
8354 xmlParseContent(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008355 if (!IS_CHAR(RAW)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008357 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00008358 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008359 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008360 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008361 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008362
8363 /*
8364 * end of parsing of this node.
8365 */
8366 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008367 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008368 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008369 if (oldname != NULL) {
8370#ifdef DEBUG_STACK
8371 fprintf(stderr,"Close: popped %s\n", oldname);
8372#endif
8373 xmlFree(oldname);
8374 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008375 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008376 }
8377
8378 /*
8379 * parse the end of tag: '</' should be here.
8380 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008381 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00008382
8383 /*
8384 * Capture end position and add node
8385 */
8386 if ( ret != NULL && ctxt->record_info ) {
8387 node_info.end_pos = ctxt->input->consumed +
8388 (CUR_PTR - ctxt->input->base);
8389 node_info.end_line = ctxt->input->line;
8390 node_info.node = ret;
8391 xmlParserAddNodeInfo(ctxt, &node_info);
8392 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008393}
8394
Daniel Veillard11e00581998-10-24 18:27:49 +00008395/**
8396 * xmlParseVersionNum:
8397 * @ctxt: an XML parser context
8398 *
8399 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008400 *
8401 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00008402 *
8403 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008404 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008405xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008406xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008407 xmlChar *buf = NULL;
8408 int len = 0;
8409 int size = 10;
8410 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008411
Daniel Veillard10a2c651999-12-12 13:03:50 +00008412 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8413 if (buf == NULL) {
8414 fprintf(stderr, "malloc of %d byte failed\n", size);
8415 return(NULL);
8416 }
8417 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00008418 while (((cur >= 'a') && (cur <= 'z')) ||
8419 ((cur >= 'A') && (cur <= 'Z')) ||
8420 ((cur >= '0') && (cur <= '9')) ||
8421 (cur == '_') || (cur == '.') ||
8422 (cur == ':') || (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008423 if (len + 1 >= size) {
8424 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008425 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008426 if (buf == NULL) {
8427 fprintf(stderr, "realloc of %d byte failed\n", size);
8428 return(NULL);
8429 }
8430 }
8431 buf[len++] = cur;
8432 NEXT;
8433 cur=CUR;
8434 }
8435 buf[len] = 0;
8436 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008437}
8438
Daniel Veillard11e00581998-10-24 18:27:49 +00008439/**
8440 * xmlParseVersionInfo:
8441 * @ctxt: an XML parser context
8442 *
8443 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008444 *
8445 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8446 *
8447 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00008448 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008449 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00008450 */
8451
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008452xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008453xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008454 xmlChar *version = NULL;
8455 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008456
Daniel Veillardcf461992000-03-14 18:30:20 +00008457 if ((RAW == 'v') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008458 (NXT(2) == 'r') && (NXT(3) == 's') &&
8459 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8460 (NXT(6) == 'n')) {
8461 SKIP(7);
8462 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008463 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008465 ctxt->sax->error(ctxt->userData,
8466 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008467 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008468 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008469 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008470 return(NULL);
8471 }
8472 NEXT;
8473 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008474 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008475 NEXT;
8476 q = CUR_PTR;
8477 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008478 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008480 ctxt->sax->error(ctxt->userData,
8481 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008482 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008483 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008484 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008485 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008486 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008487 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008488 NEXT;
8489 q = CUR_PTR;
8490 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008491 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008493 ctxt->sax->error(ctxt->userData,
8494 "String not closed\n%.50s\n", q);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008495 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008496 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008497 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008498 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008499 NEXT;
8500 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008502 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008503 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008504 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008505 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008506 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008507 }
8508 }
8509 return(version);
8510}
8511
Daniel Veillard11e00581998-10-24 18:27:49 +00008512/**
8513 * xmlParseEncName:
8514 * @ctxt: an XML parser context
8515 *
8516 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00008517 *
8518 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00008519 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008520 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008521 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008522xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008523xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008524 xmlChar *buf = NULL;
8525 int len = 0;
8526 int size = 10;
8527 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008528
Daniel Veillard10a2c651999-12-12 13:03:50 +00008529 cur = CUR;
8530 if (((cur >= 'a') && (cur <= 'z')) ||
8531 ((cur >= 'A') && (cur <= 'Z'))) {
8532 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8533 if (buf == NULL) {
8534 fprintf(stderr, "malloc of %d byte failed\n", size);
8535 return(NULL);
8536 }
8537
8538 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008539 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00008540 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00008541 while (((cur >= 'a') && (cur <= 'z')) ||
8542 ((cur >= 'A') && (cur <= 'Z')) ||
8543 ((cur >= '0') && (cur <= '9')) ||
8544 (cur == '.') || (cur == '_') ||
8545 (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008546 if (len + 1 >= size) {
8547 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008548 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008549 if (buf == NULL) {
8550 fprintf(stderr, "realloc of %d byte failed\n", size);
8551 return(NULL);
8552 }
8553 }
8554 buf[len++] = cur;
8555 NEXT;
8556 cur = CUR;
8557 if (cur == 0) {
8558 SHRINK;
8559 GROW;
8560 cur = CUR;
8561 }
8562 }
8563 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008564 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008566 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008567 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008568 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008569 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008570 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008571 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008572}
8573
Daniel Veillard11e00581998-10-24 18:27:49 +00008574/**
8575 * xmlParseEncodingDecl:
8576 * @ctxt: an XML parser context
8577 *
8578 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00008579 *
8580 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00008581 *
8582 * TODO: this should setup the conversion filters.
8583 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008584 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008585 */
8586
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008587xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008588xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008589 xmlChar *encoding = NULL;
8590 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008591
8592 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008593 if ((RAW == 'e') && (NXT(1) == 'n') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008594 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8595 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8596 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8597 SKIP(8);
8598 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008599 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008601 ctxt->sax->error(ctxt->userData,
8602 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008603 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008604 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008605 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008606 return(NULL);
8607 }
8608 NEXT;
8609 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008610 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008611 NEXT;
8612 q = CUR_PTR;
8613 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008614 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008616 ctxt->sax->error(ctxt->userData,
8617 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008618 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008619 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008620 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008621 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008622 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008623 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008624 NEXT;
8625 q = CUR_PTR;
8626 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008627 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008629 ctxt->sax->error(ctxt->userData,
8630 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008631 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008632 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008633 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008634 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008635 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008636 } else if (RAW == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008638 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008639 "xmlParseEncodingDecl : expected ' or \"\n");
8640 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008641 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008642 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008643 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008644 if (encoding != NULL) {
8645 xmlCharEncoding enc;
8646 xmlCharEncodingHandlerPtr handler;
8647
8648 if (ctxt->input->encoding != NULL)
8649 xmlFree((xmlChar *) ctxt->input->encoding);
8650 ctxt->input->encoding = encoding;
8651
8652 enc = xmlParseCharEncoding((const char *) encoding);
8653 /*
8654 * registered set of known encodings
8655 */
8656 if (enc != XML_CHAR_ENCODING_ERROR) {
8657 xmlSwitchEncoding(ctxt, enc);
8658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8659 xmlFree(encoding);
8660 return(NULL);
8661 }
8662 } else {
8663 /*
8664 * fallback for unknown encodings
8665 */
8666 handler = xmlFindCharEncodingHandler((const char *) encoding);
8667 if (handler != NULL) {
8668 xmlSwitchToEncoding(ctxt, handler);
8669 } else {
8670 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8672 ctxt->sax->error(ctxt->userData,
8673 "Unsupported encoding %s\n", encoding);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008674 return(NULL);
8675 }
8676 }
8677 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008678 }
8679 return(encoding);
8680}
8681
Daniel Veillard11e00581998-10-24 18:27:49 +00008682/**
8683 * xmlParseSDDecl:
8684 * @ctxt: an XML parser context
8685 *
8686 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00008687 *
8688 * [32] SDDecl ::= S 'standalone' Eq
8689 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00008690 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008691 * [ VC: Standalone Document Declaration ]
8692 * TODO The standalone document declaration must have the value "no"
8693 * if any external markup declarations contain declarations of:
8694 * - attributes with default values, if elements to which these
8695 * attributes apply appear in the document without specifications
8696 * of values for these attributes, or
8697 * - entities (other than amp, lt, gt, apos, quot), if references
8698 * to those entities appear in the document, or
8699 * - attributes with values subject to normalization, where the
8700 * attribute appears in the document with a value which will change
8701 * as a result of normalization, or
8702 * - element types with element content, if white space occurs directly
8703 * within any instance of those types.
8704 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008705 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00008706 */
8707
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008708int
8709xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008710 int standalone = -1;
8711
8712 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008713 if ((RAW == 's') && (NXT(1) == 't') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008714 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8715 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8716 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8717 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8718 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008719 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008720 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008722 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008723 "XML standalone declaration : expected '='\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008724 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008725 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008726 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008727 return(standalone);
8728 }
8729 NEXT;
8730 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008731 if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008732 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008733 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008734 standalone = 0;
8735 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008736 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008737 (NXT(2) == 's')) {
8738 standalone = 1;
8739 SKIP(3);
8740 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008742 ctxt->sax->error(ctxt->userData,
8743 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008744 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008745 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008746 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008747 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008748 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008750 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008751 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008752 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008753 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008754 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008755 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008756 } else if (RAW == '"'){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008757 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008758 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008759 standalone = 0;
8760 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008761 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008762 (NXT(2) == 's')) {
8763 standalone = 1;
8764 SKIP(3);
8765 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008767 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008768 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008769 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008770 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008771 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008772 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008773 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008775 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008776 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008777 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008778 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008779 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008780 NEXT;
8781 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008783 ctxt->sax->error(ctxt->userData,
8784 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008785 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008786 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008787 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008788 }
8789 }
8790 return(standalone);
8791}
8792
Daniel Veillard11e00581998-10-24 18:27:49 +00008793/**
8794 * xmlParseXMLDecl:
8795 * @ctxt: an XML parser context
8796 *
8797 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00008798 *
8799 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8800 */
8801
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008802void
8803xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008804 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008805
8806 /*
8807 * We know that '<?xml' is here.
8808 */
8809 SKIP(5);
8810
Daniel Veillardcf461992000-03-14 18:30:20 +00008811 if (!IS_BLANK(RAW)) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008813 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008814 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008815 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008816 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008817 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008818 SKIP_BLANKS;
8819
8820 /*
8821 * We should have the VersionInfo here.
8822 */
8823 version = xmlParseVersionInfo(ctxt);
8824 if (version == NULL)
8825 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00008826 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00008827 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008828
8829 /*
8830 * We may have the encoding declaration
8831 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008832 if (!IS_BLANK(RAW)) {
8833 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008834 SKIP(2);
8835 return;
8836 }
8837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008838 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008839 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008840 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008841 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008842 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008843 xmlParseEncodingDecl(ctxt);
8844 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8845 /*
8846 * The XML REC instructs us to stop parsing right here
8847 */
8848 return;
8849 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008850
8851 /*
8852 * We may have the standalone status.
8853 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008854 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8855 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008856 SKIP(2);
8857 return;
8858 }
8859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008860 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008861 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008862 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008863 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008864 }
8865 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008866 ctxt->input->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008867
8868 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008869 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008870 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008871 } else if (RAW == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008872 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008874 ctxt->sax->error(ctxt->userData,
8875 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008876 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008877 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008878 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008879 NEXT;
8880 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008882 ctxt->sax->error(ctxt->userData,
8883 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008884 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008885 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008886 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008887 MOVETO_ENDTAG(CUR_PTR);
8888 NEXT;
8889 }
8890}
8891
Daniel Veillard11e00581998-10-24 18:27:49 +00008892/**
8893 * xmlParseMisc:
8894 * @ctxt: an XML parser context
8895 *
8896 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008897 *
8898 * [27] Misc ::= Comment | PI | S
8899 */
8900
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008901void
8902xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008903 while (((RAW == '<') && (NXT(1) == '?')) ||
8904 ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008905 (NXT(2) == '-') && (NXT(3) == '-')) ||
8906 IS_BLANK(CUR)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008907 if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008908 xmlParsePI(ctxt);
8909 } else if (IS_BLANK(CUR)) {
8910 NEXT;
8911 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00008912 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008913 }
8914}
8915
Daniel Veillard11e00581998-10-24 18:27:49 +00008916/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008917 * xmlParseDocument:
Daniel Veillard11e00581998-10-24 18:27:49 +00008918 * @ctxt: an XML parser context
8919 *
8920 * parse an XML document (and build a tree if using the standard SAX
8921 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00008922 *
8923 * [1] document ::= prolog element Misc*
8924 *
8925 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00008926 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008927 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00008928 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008929 */
8930
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008931int
8932xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008933 xmlChar start[4];
8934 xmlCharEncoding enc;
8935
Daniel Veillard260a68f1998-08-13 03:39:55 +00008936 xmlDefaultSAXHandlerInit();
8937
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008938 GROW;
8939
Daniel Veillard260a68f1998-08-13 03:39:55 +00008940 /*
8941 * SAX: beginning of the document processing.
8942 */
Daniel Veillard517752b1999-04-05 12:20:10 +00008943 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00008944 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008945
Daniel Veillardcf461992000-03-14 18:30:20 +00008946 /*
8947 * Get the 4 first bytes and decode the charset
8948 * if enc != XML_CHAR_ENCODING_NONE
8949 * plug some encoding conversion routines.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008950 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008951 start[0] = RAW;
8952 start[1] = NXT(1);
8953 start[2] = NXT(2);
8954 start[3] = NXT(3);
8955 enc = xmlDetectCharEncoding(start, 4);
8956 if (enc != XML_CHAR_ENCODING_NONE) {
8957 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008958 }
8959
Daniel Veillardcf461992000-03-14 18:30:20 +00008960
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008961 if (CUR == 0) {
8962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008963 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008964 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008965 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008966 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008967 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008968
8969 /*
8970 * Check for the XMLDecl in the Prolog.
8971 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008972 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008973 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008974 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00008975 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008976
8977 /*
8978 * Note that we will switch encoding on the fly.
8979 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00008980 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008981 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8982 /*
8983 * The XML REC instructs us to stop parsing right here
8984 */
8985 return(-1);
8986 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008987 ctxt->standalone = ctxt->input->standalone;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008988 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008989 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00008990 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008991 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008992 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008993 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008994
8995 /*
8996 * The Misc part of the Prolog
8997 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008998 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008999 xmlParseMisc(ctxt);
9000
9001 /*
9002 * Then possibly doc type declaration(s) and more Misc
9003 * (doctypedecl Misc*)?
9004 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009005 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00009006 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009007 (NXT(2) == 'D') && (NXT(3) == 'O') &&
9008 (NXT(4) == 'C') && (NXT(5) == 'T') &&
9009 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
9010 (NXT(8) == 'E')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009011
9012 ctxt->inSubset = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009013 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009014 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009015 ctxt->instate = XML_PARSER_DTD;
9016 xmlParseInternalSubset(ctxt);
9017 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009018
9019 /*
9020 * Create and update the external subset.
9021 */
9022 ctxt->inSubset = 2;
9023 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9024 (!ctxt->disableSAX))
9025 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9026 ctxt->extSubSystem, ctxt->extSubURI);
9027 ctxt->inSubset = 0;
9028
9029
Daniel Veillardb05deb71999-08-10 19:04:08 +00009030 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009031 xmlParseMisc(ctxt);
9032 }
9033
9034 /*
9035 * Time to start parsing the tree itself
9036 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009037 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00009038 if (RAW != '<') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009040 ctxt->sax->error(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00009041 "Start tag expected, '<' not found\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009042 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009043 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009044 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009045 ctxt->instate = XML_PARSER_EOF;
9046 } else {
9047 ctxt->instate = XML_PARSER_CONTENT;
9048 xmlParseElement(ctxt);
9049 ctxt->instate = XML_PARSER_EPILOG;
9050
9051
9052 /*
9053 * The Misc part at the end
9054 */
9055 xmlParseMisc(ctxt);
9056
Daniel Veillardcf461992000-03-14 18:30:20 +00009057 if (RAW != 0) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9059 ctxt->sax->error(ctxt->userData,
9060 "Extra content at the end of the document\n");
9061 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009062 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009063 ctxt->errNo = XML_ERR_DOCUMENT_END;
9064 }
9065 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009066 }
9067
Daniel Veillard260a68f1998-08-13 03:39:55 +00009068 /*
9069 * SAX: end of the document processing.
9070 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009071 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9072 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00009073 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillardcf461992000-03-14 18:30:20 +00009074
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009075 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009076 return(0);
9077}
9078
Daniel Veillardb05deb71999-08-10 19:04:08 +00009079/************************************************************************
9080 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00009081 * Progressive parsing interfaces *
9082 * *
9083 ************************************************************************/
9084
9085/**
9086 * xmlParseLookupSequence:
9087 * @ctxt: an XML parser context
9088 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009089 * @next: the next char to lookup or zero
9090 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00009091 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009092 * Try to find if a sequence (first, next, third) or just (first next) or
9093 * (first) is available in the input stream.
9094 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9095 * to avoid rescanning sequences of bytes, it DOES change the state of the
9096 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00009097 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009098 * Returns the index to the current parsing point if the full sequence
9099 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00009100 */
9101int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009102xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9103 xmlChar next, xmlChar third) {
9104 int base, len;
9105 xmlParserInputPtr in;
9106 const xmlChar *buf;
9107
9108 in = ctxt->input;
9109 if (in == NULL) return(-1);
9110 base = in->cur - in->base;
9111 if (base < 0) return(-1);
9112 if (ctxt->checkIndex > base)
9113 base = ctxt->checkIndex;
9114 if (in->buf == NULL) {
9115 buf = in->base;
9116 len = in->length;
9117 } else {
9118 buf = in->buf->buffer->content;
9119 len = in->buf->buffer->use;
9120 }
9121 /* take into account the sequence length */
9122 if (third) len -= 2;
9123 else if (next) len --;
9124 for (;base < len;base++) {
9125 if (buf[base] == first) {
9126 if (third != 0) {
9127 if ((buf[base + 1] != next) ||
9128 (buf[base + 2] != third)) continue;
9129 } else if (next != 0) {
9130 if (buf[base + 1] != next) continue;
9131 }
9132 ctxt->checkIndex = 0;
9133#ifdef DEBUG_PUSH
9134 if (next == 0)
9135 fprintf(stderr, "PP: lookup '%c' found at %d\n",
9136 first, base);
9137 else if (third == 0)
9138 fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
9139 first, next, base);
9140 else
9141 fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
9142 first, next, third, base);
9143#endif
9144 return(base - (in->cur - in->base));
9145 }
9146 }
9147 ctxt->checkIndex = base;
9148#ifdef DEBUG_PUSH
9149 if (next == 0)
9150 fprintf(stderr, "PP: lookup '%c' failed\n", first);
9151 else if (third == 0)
9152 fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
9153 else
9154 fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
9155#endif
9156 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00009157}
9158
9159/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00009160 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00009161 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00009162 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00009163 *
9164 * Try to progress on parsing
9165 *
9166 * Returns zero if no parsing was possible
9167 */
9168int
Daniel Veillard71b656e2000-01-05 14:46:17 +00009169xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00009170 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009171 int avail;
9172 xmlChar cur, next;
9173
9174#ifdef DEBUG_PUSH
9175 switch (ctxt->instate) {
9176 case XML_PARSER_EOF:
9177 fprintf(stderr, "PP: try EOF\n"); break;
9178 case XML_PARSER_START:
9179 fprintf(stderr, "PP: try START\n"); break;
9180 case XML_PARSER_MISC:
9181 fprintf(stderr, "PP: try MISC\n");break;
9182 case XML_PARSER_COMMENT:
9183 fprintf(stderr, "PP: try COMMENT\n");break;
9184 case XML_PARSER_PROLOG:
9185 fprintf(stderr, "PP: try PROLOG\n");break;
9186 case XML_PARSER_START_TAG:
9187 fprintf(stderr, "PP: try START_TAG\n");break;
9188 case XML_PARSER_CONTENT:
9189 fprintf(stderr, "PP: try CONTENT\n");break;
9190 case XML_PARSER_CDATA_SECTION:
9191 fprintf(stderr, "PP: try CDATA_SECTION\n");break;
9192 case XML_PARSER_END_TAG:
9193 fprintf(stderr, "PP: try END_TAG\n");break;
9194 case XML_PARSER_ENTITY_DECL:
9195 fprintf(stderr, "PP: try ENTITY_DECL\n");break;
9196 case XML_PARSER_ENTITY_VALUE:
9197 fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
9198 case XML_PARSER_ATTRIBUTE_VALUE:
9199 fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
9200 case XML_PARSER_DTD:
9201 fprintf(stderr, "PP: try DTD\n");break;
9202 case XML_PARSER_EPILOG:
9203 fprintf(stderr, "PP: try EPILOG\n");break;
9204 case XML_PARSER_PI:
9205 fprintf(stderr, "PP: try PI\n");break;
9206 }
9207#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00009208
9209 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009210 /*
9211 * Pop-up of finished entities.
9212 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009213 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009214 xmlPopInput(ctxt);
9215
Daniel Veillardcf461992000-03-14 18:30:20 +00009216 if (ctxt->input ==NULL) break;
9217 if (ctxt->input->buf == NULL)
9218 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009219 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009220 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009221 if (avail < 1)
9222 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00009223 switch (ctxt->instate) {
9224 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009225 /*
9226 * Document parsing is done !
9227 */
9228 goto done;
9229 case XML_PARSER_START:
9230 /*
9231 * Very first chars read from the document flow.
9232 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009233 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009234 if (IS_BLANK(cur)) {
9235 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9236 ctxt->sax->setDocumentLocator(ctxt->userData,
9237 &xmlDefaultSAXLocator);
9238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9239 ctxt->sax->error(ctxt->userData,
9240 "Extra spaces at the beginning of the document are not allowed\n");
9241 ctxt->errNo = XML_ERR_DOCUMENT_START;
9242 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009243 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009244 SKIP_BLANKS;
9245 ret++;
Daniel Veillardcf461992000-03-14 18:30:20 +00009246 if (ctxt->input->buf == NULL)
9247 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009248 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009249 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009250 }
9251 if (avail < 2)
9252 goto done;
9253
Daniel Veillardcf461992000-03-14 18:30:20 +00009254 cur = ctxt->input->cur[0];
9255 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009256 if (cur == 0) {
9257 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9258 ctxt->sax->setDocumentLocator(ctxt->userData,
9259 &xmlDefaultSAXLocator);
9260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9261 ctxt->sax->error(ctxt->userData, "Document is empty\n");
9262 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9263 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009264 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009265 ctxt->instate = XML_PARSER_EOF;
9266#ifdef DEBUG_PUSH
9267 fprintf(stderr, "PP: entering EOF\n");
9268#endif
9269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9270 ctxt->sax->endDocument(ctxt->userData);
9271 goto done;
9272 }
9273 if ((cur == '<') && (next == '?')) {
9274 /* PI or XML decl */
9275 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00009276 if ((!terminate) &&
9277 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009278 return(ret);
9279 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9280 ctxt->sax->setDocumentLocator(ctxt->userData,
9281 &xmlDefaultSAXLocator);
Daniel Veillardcf461992000-03-14 18:30:20 +00009282 if ((ctxt->input->cur[2] == 'x') &&
9283 (ctxt->input->cur[3] == 'm') &&
9284 (ctxt->input->cur[4] == 'l') &&
9285 (IS_BLANK(ctxt->input->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009286 ret += 5;
9287#ifdef DEBUG_PUSH
9288 fprintf(stderr, "PP: Parsing XML Decl\n");
9289#endif
9290 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009291 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9292 /*
9293 * The XML REC instructs us to stop parsing right
9294 * here
9295 */
9296 ctxt->instate = XML_PARSER_EOF;
9297 return(0);
9298 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009299 ctxt->standalone = ctxt->input->standalone;
9300 if ((ctxt->encoding == NULL) &&
9301 (ctxt->input->encoding != NULL))
9302 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9303 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9304 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009305 ctxt->sax->startDocument(ctxt->userData);
9306 ctxt->instate = XML_PARSER_MISC;
9307#ifdef DEBUG_PUSH
9308 fprintf(stderr, "PP: entering MISC\n");
9309#endif
9310 } else {
9311 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00009312 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9313 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009314 ctxt->sax->startDocument(ctxt->userData);
9315 ctxt->instate = XML_PARSER_MISC;
9316#ifdef DEBUG_PUSH
9317 fprintf(stderr, "PP: entering MISC\n");
9318#endif
9319 }
9320 } else {
9321 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9322 ctxt->sax->setDocumentLocator(ctxt->userData,
9323 &xmlDefaultSAXLocator);
9324 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00009325 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9326 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009327 ctxt->sax->startDocument(ctxt->userData);
9328 ctxt->instate = XML_PARSER_MISC;
9329#ifdef DEBUG_PUSH
9330 fprintf(stderr, "PP: entering MISC\n");
9331#endif
9332 }
9333 break;
9334 case XML_PARSER_MISC:
9335 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009336 if (ctxt->input->buf == NULL)
9337 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009338 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009339 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009340 if (avail < 2)
9341 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009342 cur = ctxt->input->cur[0];
9343 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009344 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009345 if ((!terminate) &&
9346 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009347 goto done;
9348#ifdef DEBUG_PUSH
9349 fprintf(stderr, "PP: Parsing PI\n");
9350#endif
9351 xmlParsePI(ctxt);
9352 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009353 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009354 if ((!terminate) &&
9355 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009356 goto done;
9357#ifdef DEBUG_PUSH
9358 fprintf(stderr, "PP: Parsing Comment\n");
9359#endif
9360 xmlParseComment(ctxt);
9361 ctxt->instate = XML_PARSER_MISC;
9362 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009363 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9364 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9365 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9366 (ctxt->input->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009367 if ((!terminate) &&
9368 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009369 goto done;
9370#ifdef DEBUG_PUSH
9371 fprintf(stderr, "PP: Parsing internal subset\n");
9372#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009373 ctxt->inSubset = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009374 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009375 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009376 ctxt->instate = XML_PARSER_DTD;
9377#ifdef DEBUG_PUSH
9378 fprintf(stderr, "PP: entering DTD\n");
9379#endif
9380 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009381 /*
9382 * Create and update the external subset.
9383 */
9384 ctxt->inSubset = 2;
9385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9386 (ctxt->sax->externalSubset != NULL))
9387 ctxt->sax->externalSubset(ctxt->userData,
9388 ctxt->intSubName, ctxt->extSubSystem,
9389 ctxt->extSubURI);
9390 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009391 ctxt->instate = XML_PARSER_PROLOG;
9392#ifdef DEBUG_PUSH
9393 fprintf(stderr, "PP: entering PROLOG\n");
9394#endif
9395 }
9396 } else if ((cur == '<') && (next == '!') &&
9397 (avail < 9)) {
9398 goto done;
9399 } else {
9400 ctxt->instate = XML_PARSER_START_TAG;
9401#ifdef DEBUG_PUSH
9402 fprintf(stderr, "PP: entering START_TAG\n");
9403#endif
9404 }
9405 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009406 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009407 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009408 if (ctxt->input->buf == NULL)
9409 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009410 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009411 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009412 if (avail < 2)
9413 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009414 cur = ctxt->input->cur[0];
9415 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009416 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009417 if ((!terminate) &&
9418 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009419 goto done;
9420#ifdef DEBUG_PUSH
9421 fprintf(stderr, "PP: Parsing PI\n");
9422#endif
9423 xmlParsePI(ctxt);
9424 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009425 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009426 if ((!terminate) &&
9427 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009428 goto done;
9429#ifdef DEBUG_PUSH
9430 fprintf(stderr, "PP: Parsing Comment\n");
9431#endif
9432 xmlParseComment(ctxt);
9433 ctxt->instate = XML_PARSER_PROLOG;
9434 } else if ((cur == '<') && (next == '!') &&
9435 (avail < 4)) {
9436 goto done;
9437 } else {
9438 ctxt->instate = XML_PARSER_START_TAG;
9439#ifdef DEBUG_PUSH
9440 fprintf(stderr, "PP: entering START_TAG\n");
9441#endif
9442 }
9443 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009444 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009445 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009446 if (ctxt->input->buf == NULL)
9447 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009448 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009449 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009450 if (avail < 2)
9451 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009452 cur = ctxt->input->cur[0];
9453 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009454 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009455 if ((!terminate) &&
9456 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009457 goto done;
9458#ifdef DEBUG_PUSH
9459 fprintf(stderr, "PP: Parsing PI\n");
9460#endif
9461 xmlParsePI(ctxt);
9462 ctxt->instate = XML_PARSER_EPILOG;
9463 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009464 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009465 if ((!terminate) &&
9466 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009467 goto done;
9468#ifdef DEBUG_PUSH
9469 fprintf(stderr, "PP: Parsing Comment\n");
9470#endif
9471 xmlParseComment(ctxt);
9472 ctxt->instate = XML_PARSER_EPILOG;
9473 } else if ((cur == '<') && (next == '!') &&
9474 (avail < 4)) {
9475 goto done;
9476 } else {
9477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9478 ctxt->sax->error(ctxt->userData,
9479 "Extra content at the end of the document\n");
9480 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009481 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009482 ctxt->errNo = XML_ERR_DOCUMENT_END;
9483 ctxt->instate = XML_PARSER_EOF;
9484#ifdef DEBUG_PUSH
9485 fprintf(stderr, "PP: entering EOF\n");
9486#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009487 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9488 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009489 ctxt->sax->endDocument(ctxt->userData);
9490 goto done;
9491 }
9492 break;
9493 case XML_PARSER_START_TAG: {
9494 xmlChar *name, *oldname;
9495
Daniel Veillardcf461992000-03-14 18:30:20 +00009496 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009497 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009498 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009499 if (cur != '<') {
9500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9501 ctxt->sax->error(ctxt->userData,
9502 "Start tag expect, '<' not found\n");
9503 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9504 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009505 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009506 ctxt->instate = XML_PARSER_EOF;
9507#ifdef DEBUG_PUSH
9508 fprintf(stderr, "PP: entering EOF\n");
9509#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009510 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9511 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009512 ctxt->sax->endDocument(ctxt->userData);
9513 goto done;
9514 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00009515 if ((!terminate) &&
9516 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009517 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009518 if (ctxt->spaceNr == 0)
9519 spacePush(ctxt, -1);
9520 else
9521 spacePush(ctxt, *ctxt->space);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009522 name = xmlParseStartTag(ctxt);
9523 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009524 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009525 ctxt->instate = XML_PARSER_EOF;
9526#ifdef DEBUG_PUSH
9527 fprintf(stderr, "PP: entering EOF\n");
9528#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009529 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9530 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009531 ctxt->sax->endDocument(ctxt->userData);
9532 goto done;
9533 }
9534 namePush(ctxt, xmlStrdup(name));
9535
9536 /*
9537 * [ VC: Root Element Type ]
9538 * The Name in the document type declaration must match
9539 * the element type of the root element.
9540 */
9541 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009542 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009543 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9544
9545 /*
9546 * Check for an Empty Element.
9547 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009548 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009549 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009550 if ((ctxt->sax != NULL) &&
9551 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009552 ctxt->sax->endElement(ctxt->userData, name);
9553 xmlFree(name);
9554 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009555 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009556 if (oldname != NULL) {
9557#ifdef DEBUG_STACK
9558 fprintf(stderr,"Close: popped %s\n", oldname);
9559#endif
9560 xmlFree(oldname);
9561 }
9562 if (ctxt->name == NULL) {
9563 ctxt->instate = XML_PARSER_EPILOG;
9564#ifdef DEBUG_PUSH
9565 fprintf(stderr, "PP: entering EPILOG\n");
9566#endif
9567 } else {
9568 ctxt->instate = XML_PARSER_CONTENT;
9569#ifdef DEBUG_PUSH
9570 fprintf(stderr, "PP: entering CONTENT\n");
9571#endif
9572 }
9573 break;
9574 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009575 if (RAW == '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009576 NEXT;
9577 } else {
9578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9579 ctxt->sax->error(ctxt->userData,
9580 "Couldn't find end of Start Tag %s\n",
9581 name);
9582 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009583 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009584 ctxt->errNo = XML_ERR_GT_REQUIRED;
9585
9586 /*
9587 * end of parsing of this node.
9588 */
9589 nodePop(ctxt);
9590 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009591 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009592 if (oldname != NULL) {
9593#ifdef DEBUG_STACK
9594 fprintf(stderr,"Close: popped %s\n", oldname);
9595#endif
9596 xmlFree(oldname);
9597 }
9598 }
9599 xmlFree(name);
9600 ctxt->instate = XML_PARSER_CONTENT;
9601#ifdef DEBUG_PUSH
9602 fprintf(stderr, "PP: entering CONTENT\n");
9603#endif
9604 break;
9605 }
9606 case XML_PARSER_CONTENT:
9607 /*
9608 * Handle preparsed entities and charRef
9609 */
9610 if (ctxt->token != 0) {
9611 xmlChar cur[2] = { 0 , 0 } ;
9612
9613 cur[0] = (xmlChar) ctxt->token;
Daniel Veillardcf461992000-03-14 18:30:20 +00009614 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9615 (ctxt->sax->characters != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009616 ctxt->sax->characters(ctxt->userData, cur, 1);
9617 ctxt->token = 0;
9618 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009619 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009620 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009621 cur = ctxt->input->cur[0];
9622 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009623 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009624 if ((!terminate) &&
9625 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009626 goto done;
9627#ifdef DEBUG_PUSH
9628 fprintf(stderr, "PP: Parsing PI\n");
9629#endif
9630 xmlParsePI(ctxt);
9631 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009632 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009633 if ((!terminate) &&
9634 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009635 goto done;
9636#ifdef DEBUG_PUSH
9637 fprintf(stderr, "PP: Parsing Comment\n");
9638#endif
9639 xmlParseComment(ctxt);
9640 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009641 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9642 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9643 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9644 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9645 (ctxt->input->cur[8] == '[')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009646 SKIP(9);
9647 ctxt->instate = XML_PARSER_CDATA_SECTION;
9648#ifdef DEBUG_PUSH
9649 fprintf(stderr, "PP: entering CDATA_SECTION\n");
9650#endif
9651 break;
9652 } else if ((cur == '<') && (next == '!') &&
9653 (avail < 9)) {
9654 goto done;
9655 } else if ((cur == '<') && (next == '/')) {
9656 ctxt->instate = XML_PARSER_END_TAG;
9657#ifdef DEBUG_PUSH
9658 fprintf(stderr, "PP: entering END_TAG\n");
9659#endif
9660 break;
9661 } else if (cur == '<') {
9662 ctxt->instate = XML_PARSER_START_TAG;
9663#ifdef DEBUG_PUSH
9664 fprintf(stderr, "PP: entering START_TAG\n");
9665#endif
9666 break;
9667 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009668 if ((!terminate) &&
9669 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009670 goto done;
9671#ifdef DEBUG_PUSH
9672 fprintf(stderr, "PP: Parsing Reference\n");
9673#endif
9674 /* TODO: check generation of subtrees if noent !!! */
9675 xmlParseReference(ctxt);
9676 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009677 /* TODO Avoid the extra copy, handle directly !!! */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009678 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00009679 * Goal of the following test is:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009680 * - minimize calls to the SAX 'character' callback
9681 * when they are mergeable
9682 * - handle an problem for isBlank when we only parse
9683 * a sequence of blank chars and the next one is
9684 * not available to check against '<' presence.
9685 * - tries to homogenize the differences in SAX
9686 * callbacks beween the push and pull versions
9687 * of the parser.
9688 */
9689 if ((ctxt->inputNr == 1) &&
9690 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009691 if ((!terminate) &&
9692 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009693 goto done;
9694 }
9695 ctxt->checkIndex = 0;
9696#ifdef DEBUG_PUSH
9697 fprintf(stderr, "PP: Parsing char data\n");
9698#endif
9699 xmlParseCharData(ctxt, 0);
9700 }
9701 /*
9702 * Pop-up of finished entities.
9703 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009704 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009705 xmlPopInput(ctxt);
9706 break;
9707 case XML_PARSER_CDATA_SECTION: {
9708 /*
9709 * The Push mode need to have the SAX callback for
9710 * cdataBlock merge back contiguous callbacks.
9711 */
9712 int base;
9713
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009714 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9715 if (base < 0) {
9716 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009717 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009718 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00009719 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009720 XML_PARSER_BIG_BUFFER_SIZE);
9721 }
9722 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9723 ctxt->checkIndex = 0;
9724 }
9725 goto done;
9726 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009727 if ((ctxt->sax != NULL) && (base > 0) &&
9728 (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009729 if (ctxt->sax->cdataBlock != NULL)
9730 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00009731 ctxt->input->cur, base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009732 }
9733 SKIP(base + 3);
9734 ctxt->checkIndex = 0;
9735 ctxt->instate = XML_PARSER_CONTENT;
9736#ifdef DEBUG_PUSH
9737 fprintf(stderr, "PP: entering CONTENT\n");
9738#endif
9739 }
9740 break;
9741 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +00009742 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009743 if (avail < 2)
9744 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +00009745 if ((!terminate) &&
9746 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009747 goto done;
9748 xmlParseEndTag(ctxt);
9749 if (ctxt->name == NULL) {
9750 ctxt->instate = XML_PARSER_EPILOG;
9751#ifdef DEBUG_PUSH
9752 fprintf(stderr, "PP: entering EPILOG\n");
9753#endif
9754 } else {
9755 ctxt->instate = XML_PARSER_CONTENT;
9756#ifdef DEBUG_PUSH
9757 fprintf(stderr, "PP: entering CONTENT\n");
9758#endif
9759 }
9760 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009761 case XML_PARSER_DTD: {
9762 /*
9763 * Sorry but progressive parsing of the internal subset
9764 * is not expected to be supported. We first check that
9765 * the full content of the internal subset is available and
9766 * the parsing is launched only at that point.
9767 * Internal subset ends up with "']' S? '>'" in an unescaped
9768 * section and not in a ']]>' sequence which are conditional
9769 * sections (whoever argued to keep that crap in XML deserve
9770 * a place in hell !).
9771 */
9772 int base, i;
9773 xmlChar *buf;
9774 xmlChar quote = 0;
9775
Daniel Veillardcf461992000-03-14 18:30:20 +00009776 base = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009777 if (base < 0) return(0);
9778 if (ctxt->checkIndex > base)
9779 base = ctxt->checkIndex;
Daniel Veillardcf461992000-03-14 18:30:20 +00009780 buf = ctxt->input->buf->buffer->content;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00009781 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9782 base++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009783 if (quote != 0) {
9784 if (buf[base] == quote)
9785 quote = 0;
9786 continue;
9787 }
9788 if (buf[base] == '"') {
9789 quote = '"';
9790 continue;
9791 }
9792 if (buf[base] == '\'') {
9793 quote = '\'';
9794 continue;
9795 }
9796 if (buf[base] == ']') {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00009797 if ((unsigned int) base +1 >=
9798 ctxt->input->buf->buffer->use)
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009799 break;
9800 if (buf[base + 1] == ']') {
9801 /* conditional crap, skip both ']' ! */
9802 base++;
9803 continue;
9804 }
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00009805 for (i = 0;
9806 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9807 i++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009808 if (buf[base + i] == '>')
9809 goto found_end_int_subset;
9810 }
9811 break;
9812 }
9813 }
9814 /*
9815 * We didn't found the end of the Internal subset
9816 */
9817 if (quote == 0)
9818 ctxt->checkIndex = base;
9819#ifdef DEBUG_PUSH
9820 if (next == 0)
9821 fprintf(stderr, "PP: lookup of int subset end filed\n");
9822#endif
9823 goto done;
9824
9825found_end_int_subset:
9826 xmlParseInternalSubset(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009827 ctxt->inSubset = 2;
9828 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9829 (ctxt->sax->externalSubset != NULL))
9830 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9831 ctxt->extSubSystem, ctxt->extSubURI);
9832 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009833 ctxt->instate = XML_PARSER_PROLOG;
9834 ctxt->checkIndex = 0;
9835#ifdef DEBUG_PUSH
9836 fprintf(stderr, "PP: entering PROLOG\n");
9837#endif
9838 break;
9839 }
Daniel Veillard7f858501999-11-17 17:32:38 +00009840 case XML_PARSER_COMMENT:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009841 fprintf(stderr, "PP: internal error, state == COMMENT\n");
9842 ctxt->instate = XML_PARSER_CONTENT;
9843#ifdef DEBUG_PUSH
9844 fprintf(stderr, "PP: entering CONTENT\n");
9845#endif
9846 break;
9847 case XML_PARSER_PI:
9848 fprintf(stderr, "PP: internal error, state == PI\n");
9849 ctxt->instate = XML_PARSER_CONTENT;
9850#ifdef DEBUG_PUSH
9851 fprintf(stderr, "PP: entering CONTENT\n");
9852#endif
9853 break;
9854 case XML_PARSER_ENTITY_DECL:
9855 fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9856 ctxt->instate = XML_PARSER_DTD;
9857#ifdef DEBUG_PUSH
9858 fprintf(stderr, "PP: entering DTD\n");
9859#endif
9860 break;
9861 case XML_PARSER_ENTITY_VALUE:
9862 fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9863 ctxt->instate = XML_PARSER_CONTENT;
9864#ifdef DEBUG_PUSH
9865 fprintf(stderr, "PP: entering DTD\n");
9866#endif
9867 break;
9868 case XML_PARSER_ATTRIBUTE_VALUE:
9869 fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
9870 ctxt->instate = XML_PARSER_START_TAG;
9871#ifdef DEBUG_PUSH
9872 fprintf(stderr, "PP: entering START_TAG\n");
9873#endif
9874 break;
Daniel Veillardcf461992000-03-14 18:30:20 +00009875 case XML_PARSER_SYSTEM_LITERAL:
9876 fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
9877 ctxt->instate = XML_PARSER_START_TAG;
9878#ifdef DEBUG_PUSH
9879 fprintf(stderr, "PP: entering START_TAG\n");
9880#endif
9881 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009882 }
9883 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009884done:
9885#ifdef DEBUG_PUSH
9886 fprintf(stderr, "PP: done %d\n", ret);
9887#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00009888 return(ret);
9889}
9890
9891/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00009892 * xmlParseTry:
9893 * @ctxt: an XML parser context
9894 *
9895 * Try to progress on parsing
9896 *
9897 * Returns zero if no parsing was possible
9898 */
9899int
9900xmlParseTry(xmlParserCtxtPtr ctxt) {
9901 return(xmlParseTryOrFinish(ctxt, 0));
9902}
9903
9904/**
Daniel Veillard7f858501999-11-17 17:32:38 +00009905 * xmlParseChunk:
9906 * @ctxt: an XML parser context
9907 * @chunk: an char array
9908 * @size: the size in byte of the chunk
9909 * @terminate: last chunk indicator
9910 *
9911 * Parse a Chunk of memory
9912 *
9913 * Returns zero if no error, the xmlParserErrors otherwise.
9914 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009915int
Daniel Veillard7f858501999-11-17 17:32:38 +00009916xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9917 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00009918 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009919 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9920 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9921 int cur = ctxt->input->cur - ctxt->input->base;
9922
Daniel Veillarda819dac1999-11-24 18:04:22 +00009923 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009924 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9925 ctxt->input->cur = ctxt->input->base + cur;
9926#ifdef DEBUG_PUSH
9927 fprintf(stderr, "PP: pushed %d\n", size);
9928#endif
9929
Daniel Veillardd0f7f742000-02-02 17:42:48 +00009930 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9931 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009932 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillard71b656e2000-01-05 14:46:17 +00009933 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009934 if (terminate) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009935 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00009936 * Check for termination
9937 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009938 if ((ctxt->instate != XML_PARSER_EOF) &&
9939 (ctxt->instate != XML_PARSER_EPILOG)) {
9940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9941 ctxt->sax->error(ctxt->userData,
9942 "Extra content at the end of the document\n");
9943 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009944 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009945 ctxt->errNo = XML_ERR_DOCUMENT_END;
9946 }
9947 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009948 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9949 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009950 ctxt->sax->endDocument(ctxt->userData);
9951 }
9952 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +00009953 }
9954 return((xmlParserErrors) ctxt->errNo);
9955}
9956
9957/************************************************************************
9958 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00009959 * I/O front end functions to the parser *
9960 * *
9961 ************************************************************************/
9962
Daniel Veillard11e00581998-10-24 18:27:49 +00009963/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009964 * xmlCreatePushParserCtxt:
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00009965 * @ctxt: an XML parser context
9966 *
9967 * Blocks further parser processing
9968 */
9969void
9970xmlStopParser(xmlParserCtxtPtr ctxt) {
9971 ctxt->instate = XML_PARSER_EOF;
9972 if (ctxt->input != NULL)
9973 ctxt->input->cur = BAD_CAST"";
9974}
9975
9976/**
9977 * xmlCreatePushParserCtxt:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009978 * @sax: a SAX handler
9979 * @user_data: The user data returned on SAX callbacks
9980 * @chunk: a pointer to an array of chars
9981 * @size: number of chars in the array
9982 * @filename: an optional file name or URI
9983 *
9984 * Create a parser context for using the XML parser in push mode
9985 * To allow content encoding detection, @size should be >= 4
9986 * The value of @filename is used for fetching external entities
9987 * and error/warning reports.
9988 *
9989 * Returns the new parser context or NULL
9990 */
9991xmlParserCtxtPtr
9992xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9993 const char *chunk, int size, const char *filename) {
9994 xmlParserCtxtPtr ctxt;
9995 xmlParserInputPtr inputStream;
9996 xmlParserInputBufferPtr buf;
9997 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9998
9999 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010000 * plug some encoding conversion routines
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010001 */
10002 if ((chunk != NULL) && (size >= 4))
Daniel Veillardcf461992000-03-14 18:30:20 +000010003 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010004
10005 buf = xmlAllocParserInputBuffer(enc);
10006 if (buf == NULL) return(NULL);
10007
10008 ctxt = xmlNewParserCtxt();
10009 if (ctxt == NULL) {
10010 xmlFree(buf);
10011 return(NULL);
10012 }
10013 if (sax != NULL) {
10014 if (ctxt->sax != &xmlDefaultSAXHandler)
10015 xmlFree(ctxt->sax);
10016 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10017 if (ctxt->sax == NULL) {
10018 xmlFree(buf);
10019 xmlFree(ctxt);
10020 return(NULL);
10021 }
10022 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10023 if (user_data != NULL)
10024 ctxt->userData = user_data;
10025 }
10026 if (filename == NULL) {
10027 ctxt->directory = NULL;
10028 } else {
10029 ctxt->directory = xmlParserGetDirectory(filename);
10030 }
10031
10032 inputStream = xmlNewInputStream(ctxt);
10033 if (inputStream == NULL) {
10034 xmlFreeParserCtxt(ctxt);
10035 return(NULL);
10036 }
10037
10038 if (filename == NULL)
10039 inputStream->filename = NULL;
10040 else
10041 inputStream->filename = xmlMemStrdup(filename);
10042 inputStream->buf = buf;
10043 inputStream->base = inputStream->buf->buffer->content;
10044 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +000010045 if (enc != XML_CHAR_ENCODING_NONE) {
10046 xmlSwitchEncoding(ctxt, enc);
10047 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010048
10049 inputPush(ctxt, inputStream);
10050
10051 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10052 (ctxt->input->buf != NULL)) {
10053 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10054#ifdef DEBUG_PUSH
10055 fprintf(stderr, "PP: pushed %d\n", size);
10056#endif
10057 }
10058
10059 return(ctxt);
10060}
10061
10062/**
Daniel Veillard5e873c42000-04-12 13:27:38 +000010063 * xmlCreateIOParserCtxt:
10064 * @sax: a SAX handler
10065 * @user_data: The user data returned on SAX callbacks
10066 * @ioread: an I/O read function
10067 * @ioclose: an I/O close function
10068 * @ioctx: an I/O handler
10069 * @enc: the charset encoding if known
10070 *
10071 * Create a parser context for using the XML parser with an existing
10072 * I/O stream
10073 *
10074 * Returns the new parser context or NULL
10075 */
10076xmlParserCtxtPtr
10077xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10078 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10079 void *ioctx, xmlCharEncoding enc) {
10080 xmlParserCtxtPtr ctxt;
10081 xmlParserInputPtr inputStream;
10082 xmlParserInputBufferPtr buf;
10083
10084 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10085 if (buf == NULL) return(NULL);
10086
10087 ctxt = xmlNewParserCtxt();
10088 if (ctxt == NULL) {
10089 xmlFree(buf);
10090 return(NULL);
10091 }
10092 if (sax != NULL) {
10093 if (ctxt->sax != &xmlDefaultSAXHandler)
10094 xmlFree(ctxt->sax);
10095 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10096 if (ctxt->sax == NULL) {
10097 xmlFree(buf);
10098 xmlFree(ctxt);
10099 return(NULL);
10100 }
10101 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10102 if (user_data != NULL)
10103 ctxt->userData = user_data;
10104 }
10105
10106 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10107 if (inputStream == NULL) {
10108 xmlFreeParserCtxt(ctxt);
10109 return(NULL);
10110 }
10111 inputPush(ctxt, inputStream);
10112
10113 return(ctxt);
10114}
10115
10116/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010117 * xmlCreateDocParserCtxt:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010118 * @cur: a pointer to an array of xmlChar
Daniel Veillardd692aa41999-02-28 21:54:31 +000010119 *
Daniel Veillard06047432000-04-24 11:33:38 +000010120 * Creates a parser context for an XML in-memory document.
Daniel Veillardd692aa41999-02-28 21:54:31 +000010121 *
10122 * Returns the new parser context or NULL
10123 */
10124xmlParserCtxtPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010125xmlCreateDocParserCtxt(xmlChar *cur) {
Daniel Veillardd692aa41999-02-28 21:54:31 +000010126 xmlParserCtxtPtr ctxt;
10127 xmlParserInputPtr input;
10128
Daniel Veillardb05deb71999-08-10 19:04:08 +000010129 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +000010130 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +000010131 return(NULL);
10132 }
Daniel Veillardb05deb71999-08-10 19:04:08 +000010133 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +000010134 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000010135 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +000010136 return(NULL);
10137 }
10138
Daniel Veillardd692aa41999-02-28 21:54:31 +000010139 input->base = cur;
10140 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010141
10142 inputPush(ctxt, input);
10143 return(ctxt);
10144}
10145
10146/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010147 * xmlSAXParseDoc:
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010148 * @sax: the SAX handler block
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010149 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010150 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10151 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +000010152 *
10153 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010154 * It use the given SAX function block to handle the parsing callback.
10155 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +000010156 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010157 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +000010158 */
10159
Daniel Veillard1e346af1999-02-22 10:33:01 +000010160xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010161xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010162 xmlDocPtr ret;
10163 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010164
10165 if (cur == NULL) return(NULL);
10166
Daniel Veillardd692aa41999-02-28 21:54:31 +000010167
10168 ctxt = xmlCreateDocParserCtxt(cur);
10169 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000010170 if (sax != NULL) {
10171 ctxt->sax = sax;
10172 ctxt->userData = NULL;
10173 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000010174
10175 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +000010176 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010177 else {
10178 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000010179 xmlFreeDoc(ctxt->myDoc);
10180 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010181 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010182 if (sax != NULL)
10183 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010184 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010185
10186 return(ret);
10187}
10188
Daniel Veillard11e00581998-10-24 18:27:49 +000010189/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010190 * xmlParseDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010191 * @cur: a pointer to an array of xmlChar
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010192 *
10193 * parse an XML in-memory document and build a tree.
10194 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010195 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010196 */
10197
Daniel Veillard1e346af1999-02-22 10:33:01 +000010198xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010199xmlParseDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010200 return(xmlSAXParseDoc(NULL, cur, 0));
10201}
10202
10203/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010204 * xmlSAXParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +000010205 * @sax: the SAX handler block
10206 * @ExternalID: a NAME* containing the External ID of the DTD
10207 * @SystemID: a NAME* containing the URL to the DTD
10208 *
10209 * Load and parse an external subset.
10210 *
10211 * Returns the resulting xmlDtdPtr or NULL in case of error.
10212 */
10213
10214xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010215xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10216 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010217 xmlDtdPtr ret = NULL;
10218 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +000010219 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010220 xmlCharEncoding enc;
10221
10222 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10223
Daniel Veillardb05deb71999-08-10 19:04:08 +000010224 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +000010225 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010226 return(NULL);
10227 }
Daniel Veillard011b63c1999-06-02 17:44:04 +000010228
10229 /*
10230 * Set-up the SAX context
10231 */
Daniel Veillard011b63c1999-06-02 17:44:04 +000010232 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010233 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000010234 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010235 ctxt->sax = sax;
10236 ctxt->userData = NULL;
10237 }
10238
10239 /*
10240 * Ask the Entity resolver to load the damn thing
10241 */
10242
10243 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
10244 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
10245 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +000010246 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010247 xmlFreeParserCtxt(ctxt);
10248 return(NULL);
10249 }
10250
10251 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010252 * plug some encoding conversion routines here.
Daniel Veillard011b63c1999-06-02 17:44:04 +000010253 */
10254 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +000010255 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010256 xmlSwitchEncoding(ctxt, enc);
10257
Daniel Veillardb05deb71999-08-10 19:04:08 +000010258 if (input->filename == NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +000010259 input->filename = (char *) xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010260 input->line = 1;
10261 input->col = 1;
10262 input->base = ctxt->input->cur;
10263 input->cur = ctxt->input->cur;
10264 input->free = NULL;
10265
10266 /*
10267 * let's parse that entity knowing it's an external subset.
10268 */
Daniel Veillard06047432000-04-24 11:33:38 +000010269 ctxt->inSubset = 2;
10270 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10271 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10272 ExternalID, SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010273 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10274
10275 if (ctxt->myDoc != NULL) {
10276 if (ctxt->wellFormed) {
Daniel Veillard06047432000-04-24 11:33:38 +000010277 ret = ctxt->myDoc->extSubset;
10278 ctxt->myDoc->extSubset = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010279 } else {
10280 ret = NULL;
10281 }
10282 xmlFreeDoc(ctxt->myDoc);
10283 ctxt->myDoc = NULL;
10284 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010285 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010286 xmlFreeParserCtxt(ctxt);
10287
10288 return(ret);
10289}
10290
10291/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010292 * xmlParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +000010293 * @ExternalID: a NAME* containing the External ID of the DTD
10294 * @SystemID: a NAME* containing the URL to the DTD
10295 *
10296 * Load and parse an external subset.
10297 *
10298 * Returns the resulting xmlDtdPtr or NULL in case of error.
10299 */
10300
10301xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010302xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010303 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10304}
10305
10306/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010307 * xmlSAXParseBalancedChunk:
Daniel Veillard0142b842000-01-14 14:45:24 +000010308 * @ctx: an XML parser context (possibly NULL)
10309 * @sax: the SAX handler bloc (possibly NULL)
10310 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10311 * @input: a parser input stream
10312 * @enc: the encoding
10313 *
10314 * Parse a well-balanced chunk of an XML document
10315 * The user has to provide SAX callback block whose routines will be
10316 * called by the parser
10317 * The allowed sequence for the Well Balanced Chunk is the one defined by
10318 * the content production in the XML grammar:
10319 *
10320 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10321 *
Daniel Veillardcf461992000-03-14 18:30:20 +000010322 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
Daniel Veillard0142b842000-01-14 14:45:24 +000010323 * the error code otherwise
10324 */
10325
10326int
10327xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10328 void *user_data, xmlParserInputPtr input,
10329 xmlCharEncoding enc) {
10330 xmlParserCtxtPtr ctxt;
10331 int ret;
10332
10333 if (input == NULL) return(-1);
10334
10335 if (ctx != NULL)
10336 ctxt = ctx;
10337 else {
10338 ctxt = xmlNewParserCtxt();
10339 if (ctxt == NULL)
10340 return(-1);
10341 if (sax == NULL)
10342 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10343 }
10344
10345 /*
10346 * Set-up the SAX context
10347 */
10348 if (sax != NULL) {
10349 if (ctxt->sax != NULL)
10350 xmlFree(ctxt->sax);
10351 ctxt->sax = sax;
10352 ctxt->userData = user_data;
10353 }
10354
10355 /*
10356 * plug some encoding conversion routines here.
10357 */
10358 xmlPushInput(ctxt, input);
10359 if (enc != XML_CHAR_ENCODING_NONE)
10360 xmlSwitchEncoding(ctxt, enc);
10361
10362 /*
10363 * let's parse that entity knowing it's an external subset.
10364 */
10365 xmlParseContent(ctxt);
10366 ret = ctxt->errNo;
10367
10368 if (ctx == NULL) {
10369 if (sax != NULL)
10370 ctxt->sax = NULL;
10371 else
10372 xmlFreeDoc(ctxt->myDoc);
10373 xmlFreeParserCtxt(ctxt);
10374 }
10375 return(ret);
10376}
10377
10378/**
Daniel Veillard87b95392000-08-12 21:12:04 +000010379 * xmlParseCtxtExternalEntity:
10380 * @ctx: the existing parsing context
10381 * @URL: the URL for the entity to load
10382 * @ID: the System ID for the entity to load
10383 * @list: the return value for the set of parsed nodes
10384 *
10385 * Parse an external general entity within an existing parsing context
10386 * An external general parsed entity is well-formed if it matches the
10387 * production labeled extParsedEnt.
10388 *
10389 * [78] extParsedEnt ::= TextDecl? content
10390 *
10391 * Returns 0 if the entity is well formed, -1 in case of args problem and
10392 * the parser error code otherwise
10393 */
10394
10395int
10396xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
10397 const xmlChar *ID, xmlNodePtr *list) {
10398 xmlParserCtxtPtr ctxt;
10399 xmlDocPtr newDoc;
10400 xmlSAXHandlerPtr oldsax = NULL;
10401 int ret = 0;
10402
10403 if (ctx->depth > 40) {
10404 return(XML_ERR_ENTITY_LOOP);
10405 }
10406
10407 if (list != NULL)
10408 *list = NULL;
10409 if ((URL == NULL) && (ID == NULL))
10410 return(-1);
10411 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10412 return(-1);
10413
10414
10415 ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
10416 if (ctxt == NULL) return(-1);
10417 ctxt->userData = ctxt;
10418 oldsax = ctxt->sax;
10419 ctxt->sax = ctx->sax;
10420 newDoc = xmlNewDoc(BAD_CAST "1.0");
10421 if (newDoc == NULL) {
10422 xmlFreeParserCtxt(ctxt);
10423 return(-1);
10424 }
10425 if (ctx->myDoc != NULL) {
10426 newDoc->intSubset = ctx->myDoc->intSubset;
10427 newDoc->extSubset = ctx->myDoc->extSubset;
10428 }
10429 if (ctx->myDoc->URL != NULL) {
10430 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10431 }
10432 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10433 if (newDoc->children == NULL) {
10434 ctxt->sax = oldsax;
10435 xmlFreeParserCtxt(ctxt);
10436 newDoc->intSubset = NULL;
10437 newDoc->extSubset = NULL;
10438 xmlFreeDoc(newDoc);
10439 return(-1);
10440 }
10441 nodePush(ctxt, newDoc->children);
10442 if (ctx->myDoc == NULL) {
10443 ctxt->myDoc = newDoc;
10444 } else {
10445 ctxt->myDoc = ctx->myDoc;
10446 newDoc->children->doc = ctx->myDoc;
10447 }
10448
10449 /*
10450 * Parse a possible text declaration first
10451 */
10452 GROW;
10453 if ((RAW == '<') && (NXT(1) == '?') &&
10454 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10455 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10456 xmlParseTextDecl(ctxt);
10457 }
10458
10459 /*
10460 * Doing validity checking on chunk doesn't make sense
10461 */
10462 ctxt->instate = XML_PARSER_CONTENT;
10463 ctxt->validate = ctx->validate;
10464 ctxt->depth = ctx->depth + 1;
10465 ctxt->replaceEntities = ctx->replaceEntities;
10466 if (ctxt->validate) {
10467 ctxt->vctxt.error = ctx->vctxt.error;
10468 ctxt->vctxt.warning = ctx->vctxt.warning;
10469 /* Allocate the Node stack */
10470 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
10471 ctxt->vctxt.nodeNr = 0;
10472 ctxt->vctxt.nodeMax = 4;
10473 ctxt->vctxt.node = NULL;
10474 } else {
10475 ctxt->vctxt.error = NULL;
10476 ctxt->vctxt.warning = NULL;
10477 }
10478
10479 xmlParseContent(ctxt);
10480
10481 if ((RAW == '<') && (NXT(1) == '/')) {
10482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10483 ctxt->sax->error(ctxt->userData,
10484 "chunk is not well balanced\n");
10485 ctxt->wellFormed = 0;
10486 ctxt->disableSAX = 1;
10487 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10488 } else if (RAW != 0) {
10489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10490 ctxt->sax->error(ctxt->userData,
10491 "extra content at the end of well balanced chunk\n");
10492 ctxt->wellFormed = 0;
10493 ctxt->disableSAX = 1;
10494 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10495 }
10496 if (ctxt->node != newDoc->children) {
10497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10498 ctxt->sax->error(ctxt->userData,
10499 "chunk is not well balanced\n");
10500 ctxt->wellFormed = 0;
10501 ctxt->disableSAX = 1;
10502 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10503 }
10504
10505 if (!ctxt->wellFormed) {
10506 if (ctxt->errNo == 0)
10507 ret = 1;
10508 else
10509 ret = ctxt->errNo;
10510 } else {
10511 if (list != NULL) {
10512 xmlNodePtr cur;
10513
10514 /*
10515 * Return the newly created nodeset after unlinking it from
10516 * they pseudo parent.
10517 */
10518 cur = newDoc->children->children;
10519 *list = cur;
10520 while (cur != NULL) {
10521 cur->parent = NULL;
10522 cur = cur->next;
10523 }
10524 newDoc->children->children = NULL;
10525 }
10526 ret = 0;
10527 }
10528 ctxt->sax = oldsax;
10529 xmlFreeParserCtxt(ctxt);
10530 newDoc->intSubset = NULL;
10531 newDoc->extSubset = NULL;
10532 xmlFreeDoc(newDoc);
10533
10534 return(ret);
10535}
10536
10537/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010538 * xmlParseExternalEntity:
Daniel Veillard0142b842000-01-14 14:45:24 +000010539 * @doc: the document the chunk pertains to
Daniel Veillardcf461992000-03-14 18:30:20 +000010540 * @sax: the SAX handler bloc (possibly NULL)
10541 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10542 * @depth: Used for loop detection, use 0
10543 * @URL: the URL for the entity to load
10544 * @ID: the System ID for the entity to load
10545 * @list: the return value for the set of parsed nodes
Daniel Veillard0142b842000-01-14 14:45:24 +000010546 *
Daniel Veillardcf461992000-03-14 18:30:20 +000010547 * Parse an external general entity
10548 * An external general parsed entity is well-formed if it matches the
10549 * production labeled extParsedEnt.
10550 *
10551 * [78] extParsedEnt ::= TextDecl? content
10552 *
10553 * Returns 0 if the entity is well formed, -1 in case of args problem and
10554 * the parser error code otherwise
Daniel Veillard0142b842000-01-14 14:45:24 +000010555 */
10556
Daniel Veillardcf461992000-03-14 18:30:20 +000010557int
10558xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
10559 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
10560 xmlParserCtxtPtr ctxt;
10561 xmlDocPtr newDoc;
10562 xmlSAXHandlerPtr oldsax = NULL;
10563 int ret = 0;
10564
10565 if (depth > 40) {
10566 return(XML_ERR_ENTITY_LOOP);
10567 }
10568
10569
10570
10571 if (list != NULL)
10572 *list = NULL;
10573 if ((URL == NULL) && (ID == NULL))
10574 return(-1);
Daniel Veillard87b95392000-08-12 21:12:04 +000010575 if (doc == NULL) /* @@ relax but check for dereferences */
10576 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +000010577
10578
10579 ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10580 if (ctxt == NULL) return(-1);
10581 ctxt->userData = ctxt;
10582 if (sax != NULL) {
10583 oldsax = ctxt->sax;
10584 ctxt->sax = sax;
10585 if (user_data != NULL)
10586 ctxt->userData = user_data;
10587 }
10588 newDoc = xmlNewDoc(BAD_CAST "1.0");
10589 if (newDoc == NULL) {
10590 xmlFreeParserCtxt(ctxt);
10591 return(-1);
10592 }
10593 if (doc != NULL) {
10594 newDoc->intSubset = doc->intSubset;
10595 newDoc->extSubset = doc->extSubset;
10596 }
10597 if (doc->URL != NULL) {
10598 newDoc->URL = xmlStrdup(doc->URL);
10599 }
10600 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10601 if (newDoc->children == NULL) {
10602 if (sax != NULL)
10603 ctxt->sax = oldsax;
10604 xmlFreeParserCtxt(ctxt);
10605 newDoc->intSubset = NULL;
10606 newDoc->extSubset = NULL;
10607 xmlFreeDoc(newDoc);
10608 return(-1);
10609 }
10610 nodePush(ctxt, newDoc->children);
10611 if (doc == NULL) {
10612 ctxt->myDoc = newDoc;
10613 } else {
10614 ctxt->myDoc = doc;
10615 newDoc->children->doc = doc;
10616 }
10617
10618 /*
10619 * Parse a possible text declaration first
10620 */
10621 GROW;
10622 if ((RAW == '<') && (NXT(1) == '?') &&
10623 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10624 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10625 xmlParseTextDecl(ctxt);
10626 }
10627
10628 /*
10629 * Doing validity checking on chunk doesn't make sense
10630 */
10631 ctxt->instate = XML_PARSER_CONTENT;
10632 ctxt->validate = 0;
10633 ctxt->depth = depth;
10634
10635 xmlParseContent(ctxt);
10636
10637 if ((RAW == '<') && (NXT(1) == '/')) {
10638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10639 ctxt->sax->error(ctxt->userData,
10640 "chunk is not well balanced\n");
10641 ctxt->wellFormed = 0;
10642 ctxt->disableSAX = 1;
10643 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10644 } else if (RAW != 0) {
10645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10646 ctxt->sax->error(ctxt->userData,
10647 "extra content at the end of well balanced chunk\n");
10648 ctxt->wellFormed = 0;
10649 ctxt->disableSAX = 1;
10650 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10651 }
10652 if (ctxt->node != newDoc->children) {
10653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10654 ctxt->sax->error(ctxt->userData,
10655 "chunk is not well balanced\n");
10656 ctxt->wellFormed = 0;
10657 ctxt->disableSAX = 1;
10658 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10659 }
10660
10661 if (!ctxt->wellFormed) {
10662 if (ctxt->errNo == 0)
10663 ret = 1;
10664 else
10665 ret = ctxt->errNo;
10666 } else {
10667 if (list != NULL) {
10668 xmlNodePtr cur;
10669
10670 /*
10671 * Return the newly created nodeset after unlinking it from
10672 * they pseudo parent.
10673 */
10674 cur = newDoc->children->children;
10675 *list = cur;
10676 while (cur != NULL) {
10677 cur->parent = NULL;
10678 cur = cur->next;
10679 }
10680 newDoc->children->children = NULL;
10681 }
10682 ret = 0;
10683 }
10684 if (sax != NULL)
10685 ctxt->sax = oldsax;
10686 xmlFreeParserCtxt(ctxt);
10687 newDoc->intSubset = NULL;
10688 newDoc->extSubset = NULL;
10689 xmlFreeDoc(newDoc);
10690
10691 return(ret);
Daniel Veillard0142b842000-01-14 14:45:24 +000010692}
10693
10694/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010695 * xmlParseBalancedChunk:
10696 * @doc: the document the chunk pertains to
10697 * @sax: the SAX handler bloc (possibly NULL)
10698 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10699 * @depth: Used for loop detection, use 0
10700 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10701 * @list: the return value for the set of parsed nodes
10702 *
10703 * Parse a well-balanced chunk of an XML document
10704 * called by the parser
10705 * The allowed sequence for the Well Balanced Chunk is the one defined by
10706 * the content production in the XML grammar:
10707 *
10708 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10709 *
10710 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10711 * the parser error code otherwise
10712 */
10713
10714int
10715xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10716 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
10717 xmlParserCtxtPtr ctxt;
10718 xmlDocPtr newDoc;
10719 xmlSAXHandlerPtr oldsax = NULL;
10720 int size;
10721 int ret = 0;
10722
10723 if (depth > 40) {
10724 return(XML_ERR_ENTITY_LOOP);
10725 }
10726
10727
10728 if (list != NULL)
10729 *list = NULL;
10730 if (string == NULL)
10731 return(-1);
10732
10733 size = xmlStrlen(string);
10734
10735 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10736 if (ctxt == NULL) return(-1);
10737 ctxt->userData = ctxt;
10738 if (sax != NULL) {
10739 oldsax = ctxt->sax;
10740 ctxt->sax = sax;
10741 if (user_data != NULL)
10742 ctxt->userData = user_data;
10743 }
10744 newDoc = xmlNewDoc(BAD_CAST "1.0");
10745 if (newDoc == NULL) {
10746 xmlFreeParserCtxt(ctxt);
10747 return(-1);
10748 }
10749 if (doc != NULL) {
10750 newDoc->intSubset = doc->intSubset;
10751 newDoc->extSubset = doc->extSubset;
10752 }
10753 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10754 if (newDoc->children == NULL) {
10755 if (sax != NULL)
10756 ctxt->sax = oldsax;
10757 xmlFreeParserCtxt(ctxt);
10758 newDoc->intSubset = NULL;
10759 newDoc->extSubset = NULL;
10760 xmlFreeDoc(newDoc);
10761 return(-1);
10762 }
10763 nodePush(ctxt, newDoc->children);
10764 if (doc == NULL) {
10765 ctxt->myDoc = newDoc;
10766 } else {
10767 ctxt->myDoc = doc;
10768 newDoc->children->doc = doc;
10769 }
10770 ctxt->instate = XML_PARSER_CONTENT;
10771 ctxt->depth = depth;
10772
10773 /*
10774 * Doing validity checking on chunk doesn't make sense
10775 */
10776 ctxt->validate = 0;
10777
10778 xmlParseContent(ctxt);
10779
10780 if ((RAW == '<') && (NXT(1) == '/')) {
10781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10782 ctxt->sax->error(ctxt->userData,
10783 "chunk is not well balanced\n");
10784 ctxt->wellFormed = 0;
10785 ctxt->disableSAX = 1;
10786 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10787 } else if (RAW != 0) {
10788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10789 ctxt->sax->error(ctxt->userData,
10790 "extra content at the end of well balanced chunk\n");
10791 ctxt->wellFormed = 0;
10792 ctxt->disableSAX = 1;
10793 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10794 }
10795 if (ctxt->node != newDoc->children) {
10796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10797 ctxt->sax->error(ctxt->userData,
10798 "chunk is not well balanced\n");
10799 ctxt->wellFormed = 0;
10800 ctxt->disableSAX = 1;
10801 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10802 }
10803
10804 if (!ctxt->wellFormed) {
10805 if (ctxt->errNo == 0)
10806 ret = 1;
10807 else
10808 ret = ctxt->errNo;
10809 } else {
10810 if (list != NULL) {
10811 xmlNodePtr cur;
10812
10813 /*
10814 * Return the newly created nodeset after unlinking it from
10815 * they pseudo parent.
10816 */
10817 cur = newDoc->children->children;
10818 *list = cur;
10819 while (cur != NULL) {
10820 cur->parent = NULL;
10821 cur = cur->next;
10822 }
10823 newDoc->children->children = NULL;
10824 }
10825 ret = 0;
10826 }
10827 if (sax != NULL)
10828 ctxt->sax = oldsax;
10829 xmlFreeParserCtxt(ctxt);
10830 newDoc->intSubset = NULL;
10831 newDoc->extSubset = NULL;
10832 xmlFreeDoc(newDoc);
10833
10834 return(ret);
10835}
10836
10837/**
10838 * xmlParseBalancedChunkFile:
Daniel Veillard0142b842000-01-14 14:45:24 +000010839 * @doc: the document the chunk pertains to
10840 *
10841 * Parse a well-balanced chunk of an XML document contained in a file
10842 *
10843 * Returns the resulting list of nodes resulting from the parsing,
10844 * they are not added to @node
10845 */
10846
10847xmlNodePtr
10848xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
Daniel Veillardcf461992000-03-14 18:30:20 +000010849 /* TODO !!! */
10850 return(NULL);
Daniel Veillard0142b842000-01-14 14:45:24 +000010851}
10852
10853/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010854 * xmlRecoverDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010855 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010856 *
10857 * parse an XML in-memory document and build a tree.
10858 * In the case the document is not Well Formed, a tree is built anyway
10859 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010860 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010861 */
10862
Daniel Veillard1e346af1999-02-22 10:33:01 +000010863xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010864xmlRecoverDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010865 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010866}
10867
10868/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010869 * xmlCreateEntityParserCtxt:
10870 * @URL: the entity URL
10871 * @ID: the entity PUBLIC ID
10872 * @base: a posible base for the target URI
10873 *
10874 * Create a parser context for an external entity
10875 * Automatic support for ZLIB/Compress compressed document is provided
10876 * by default if found at compile-time.
10877 *
10878 * Returns the new parser context or NULL
10879 */
10880xmlParserCtxtPtr
10881xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10882 const xmlChar *base) {
10883 xmlParserCtxtPtr ctxt;
10884 xmlParserInputPtr inputStream;
10885 char *directory = NULL;
Daniel Veillard87b95392000-08-12 21:12:04 +000010886 xmlChar *uri;
10887
Daniel Veillardcf461992000-03-14 18:30:20 +000010888 ctxt = xmlNewParserCtxt();
10889 if (ctxt == NULL) {
10890 return(NULL);
10891 }
10892
Daniel Veillard87b95392000-08-12 21:12:04 +000010893 uri = xmlBuildURI(URL, base);
10894
10895 if (uri == NULL) {
10896 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10897 if (inputStream == NULL) {
10898 xmlFreeParserCtxt(ctxt);
10899 return(NULL);
10900 }
10901
10902 inputPush(ctxt, inputStream);
10903
10904 if ((ctxt->directory == NULL) && (directory == NULL))
10905 directory = xmlParserGetDirectory((char *)URL);
10906 if ((ctxt->directory == NULL) && (directory != NULL))
10907 ctxt->directory = directory;
10908 } else {
10909 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10910 if (inputStream == NULL) {
10911 xmlFreeParserCtxt(ctxt);
10912 return(NULL);
10913 }
10914
10915 inputPush(ctxt, inputStream);
10916
10917 if ((ctxt->directory == NULL) && (directory == NULL))
10918 directory = xmlParserGetDirectory((char *)uri);
10919 if ((ctxt->directory == NULL) && (directory != NULL))
10920 ctxt->directory = directory;
10921 xmlFree(uri);
Daniel Veillardcf461992000-03-14 18:30:20 +000010922 }
10923
Daniel Veillardcf461992000-03-14 18:30:20 +000010924 return(ctxt);
10925}
10926
10927/**
10928 * xmlCreateFileParserCtxt:
Daniel Veillard11e00581998-10-24 18:27:49 +000010929 * @filename: the filename
10930 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010931 * Create a parser context for a file content.
10932 * Automatic support for ZLIB/Compress compressed document is provided
10933 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +000010934 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010935 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000010936 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000010937xmlParserCtxtPtr
10938xmlCreateFileParserCtxt(const char *filename)
10939{
10940 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010941 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010942 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010943 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010944
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010945 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10946 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010947
Daniel Veillardb05deb71999-08-10 19:04:08 +000010948 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +000010949 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010950 return(NULL);
10951 }
Daniel Veillardb05deb71999-08-10 19:04:08 +000010952
10953 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010954 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000010955 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010956 return(NULL);
10957 }
10958
Daniel Veillard6454aec1999-09-02 22:04:43 +000010959 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010960 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010961 inputStream->base = inputStream->buf->buffer->content;
10962 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010963
10964 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +000010965 if ((ctxt->directory == NULL) && (directory == NULL))
10966 directory = xmlParserGetDirectory(filename);
10967 if ((ctxt->directory == NULL) && (directory != NULL))
10968 ctxt->directory = directory;
10969
Daniel Veillardd692aa41999-02-28 21:54:31 +000010970 return(ctxt);
10971}
10972
10973/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010974 * xmlSAXParseFile:
Daniel Veillardd692aa41999-02-28 21:54:31 +000010975 * @sax: the SAX handler block
10976 * @filename: the filename
10977 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10978 * documents
10979 *
10980 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10981 * compressed document is provided by default if found at compile-time.
10982 * It use the given SAX function block to handle the parsing callback.
10983 * If sax is NULL, fallback to the default DOM tree building routines.
10984 *
10985 * Returns the resulting document tree
10986 */
10987
Daniel Veillard011b63c1999-06-02 17:44:04 +000010988xmlDocPtr
10989xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +000010990 int recovery) {
10991 xmlDocPtr ret;
10992 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010993 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010994
10995 ctxt = xmlCreateFileParserCtxt(filename);
10996 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000010997 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010998 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000010999 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +000011000 ctxt->sax = sax;
11001 ctxt->userData = NULL;
11002 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000011003
Daniel Veillardb05deb71999-08-10 19:04:08 +000011004 if ((ctxt->directory == NULL) && (directory == NULL))
11005 directory = xmlParserGetDirectory(filename);
11006 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +000011007 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011008
Daniel Veillard260a68f1998-08-13 03:39:55 +000011009 xmlParseDocument(ctxt);
11010
Daniel Veillard517752b1999-04-05 12:20:10 +000011011 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011012 else {
11013 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000011014 xmlFreeDoc(ctxt->myDoc);
11015 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011016 }
Daniel Veillard97fea181999-06-26 23:07:37 +000011017 if (sax != NULL)
11018 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000011019 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011020
11021 return(ret);
11022}
11023
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011024/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011025 * xmlParseFile:
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011026 * @filename: the filename
11027 *
11028 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11029 * compressed document is provided by default if found at compile-time.
11030 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011031 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011032 */
11033
Daniel Veillard011b63c1999-06-02 17:44:04 +000011034xmlDocPtr
11035xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011036 return(xmlSAXParseFile(NULL, filename, 0));
11037}
11038
11039/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011040 * xmlRecoverFile:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011041 * @filename: the filename
11042 *
11043 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11044 * compressed document is provided by default if found at compile-time.
11045 * In the case the document is not Well Formed, a tree is built anyway
11046 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011047 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011048 */
11049
Daniel Veillard011b63c1999-06-02 17:44:04 +000011050xmlDocPtr
11051xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011052 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011053}
Daniel Veillard260a68f1998-08-13 03:39:55 +000011054
Daniel Veillard11e00581998-10-24 18:27:49 +000011055/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011056 * xmlCreateMemoryParserCtxt:
11057 * @buffer: a pointer to a zero terminated char array
Daniel Veillardb566ce12000-03-04 11:39:42 +000011058 * @size: the size of the array (without the trailing 0)
Daniel Veillard11e00581998-10-24 18:27:49 +000011059 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011060 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +000011061 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011062 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000011063 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000011064xmlParserCtxtPtr
11065xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011066 xmlParserCtxtPtr ctxt;
11067 xmlParserInputPtr input;
Daniel Veillard46e370e2000-07-21 20:32:03 +000011068 xmlParserInputBufferPtr buf;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011069
Daniel Veillardcf461992000-03-14 18:30:20 +000011070 if (buffer[size] != 0)
Daniel Veillardb566ce12000-03-04 11:39:42 +000011071 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011072
Daniel Veillardb05deb71999-08-10 19:04:08 +000011073 ctxt = xmlNewParserCtxt();
Daniel Veillardcf461992000-03-14 18:30:20 +000011074 if (ctxt == NULL)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011075 return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011076
Daniel Veillard46e370e2000-07-21 20:32:03 +000011077 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
11078 if (buf == NULL) return(NULL);
11079
Daniel Veillardb05deb71999-08-10 19:04:08 +000011080 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011081 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000011082 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011083 return(NULL);
11084 }
11085
11086 input->filename = NULL;
Daniel Veillard46e370e2000-07-21 20:32:03 +000011087 input->buf = buf;
11088 input->base = input->buf->buffer->content;
11089 input->cur = input->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011090
11091 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +000011092 return(ctxt);
11093}
11094
11095/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011096 * xmlSAXParseMemory:
Daniel Veillardd692aa41999-02-28 21:54:31 +000011097 * @sax: the SAX handler block
11098 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +000011099 * @size: the size of the array
11100 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +000011101 * documents
11102 *
11103 * parse an XML in-memory block and use the given SAX function block
11104 * to handle the parsing callback. If sax is NULL, fallback to the default
11105 * DOM tree building routines.
11106 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011107 * Returns the resulting document tree
11108 */
11109xmlDocPtr
11110xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
11111 xmlDocPtr ret;
11112 xmlParserCtxtPtr ctxt;
11113
11114 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11115 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000011116 if (sax != NULL) {
11117 ctxt->sax = sax;
11118 ctxt->userData = NULL;
11119 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000011120
11121 xmlParseDocument(ctxt);
11122
Daniel Veillard517752b1999-04-05 12:20:10 +000011123 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011124 else {
11125 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000011126 xmlFreeDoc(ctxt->myDoc);
11127 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011128 }
Daniel Veillard97fea181999-06-26 23:07:37 +000011129 if (sax != NULL)
11130 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000011131 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011132
11133 return(ret);
11134}
11135
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011136/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011137 * xmlParseMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000011138 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011139 * @size: the size of the array
11140 *
11141 * parse an XML in-memory block and build a tree.
11142 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011143 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011144 */
11145
11146xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011147 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11148}
11149
11150/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011151 * xmlRecoverMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000011152 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011153 * @size: the size of the array
11154 *
11155 * parse an XML in-memory block and build a tree.
11156 * In the case the document is not Well Formed, a tree is built anyway
11157 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011158 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011159 */
11160
11161xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
11162 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011163}
Daniel Veillard260a68f1998-08-13 03:39:55 +000011164
Daniel Veillard260a68f1998-08-13 03:39:55 +000011165
Daniel Veillard11e00581998-10-24 18:27:49 +000011166/**
11167 * xmlSetupParserForBuffer:
11168 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011169 * @buffer: a xmlChar * buffer
Daniel Veillard11e00581998-10-24 18:27:49 +000011170 * @filename: a file name
11171 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000011172 * Setup the parser context to parse a new buffer; Clears any prior
11173 * contents from the parser context. The buffer parameter must not be
11174 * NULL, but the filename parameter can be
11175 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011176void
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011177xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +000011178 const char* filename)
11179{
Daniel Veillardb05deb71999-08-10 19:04:08 +000011180 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011181
Daniel Veillardb05deb71999-08-10 19:04:08 +000011182 input = xmlNewInputStream(ctxt);
11183 if (input == NULL) {
11184 perror("malloc");
Daniel Veillard6454aec1999-09-02 22:04:43 +000011185 xmlFree(ctxt);
Daniel Veillard0142b842000-01-14 14:45:24 +000011186 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +000011187 }
11188
11189 xmlClearParserCtxt(ctxt);
11190 if (filename != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000011191 input->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011192 input->base = buffer;
11193 input->cur = buffer;
11194 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011195}
11196
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011197/**
11198 * xmlSAXUserParseFile:
11199 * @sax: a SAX handler
11200 * @user_data: The user data returned on SAX callbacks
11201 * @filename: a file name
11202 *
11203 * parse an XML file and call the given SAX handler routines.
11204 * Automatic support for ZLIB/Compress compressed document is provided
11205 *
11206 * Returns 0 in case of success or a error number otherwise
11207 */
Daniel Veillard11a48ec1999-11-23 10:40:46 +000011208int
11209xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11210 const char *filename) {
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011211 int ret = 0;
11212 xmlParserCtxtPtr ctxt;
11213
11214 ctxt = xmlCreateFileParserCtxt(filename);
11215 if (ctxt == NULL) return -1;
Daniel Veillard294cbca1999-12-03 13:19:09 +000011216 if (ctxt->sax != &xmlDefaultSAXHandler)
11217 xmlFree(ctxt->sax);
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011218 ctxt->sax = sax;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000011219 if (user_data != NULL)
11220 ctxt->userData = user_data;
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011221
11222 xmlParseDocument(ctxt);
11223
11224 if (ctxt->wellFormed)
11225 ret = 0;
11226 else {
11227 if (ctxt->errNo != 0)
11228 ret = ctxt->errNo;
11229 else
11230 ret = -1;
11231 }
11232 if (sax != NULL)
11233 ctxt->sax = NULL;
11234 xmlFreeParserCtxt(ctxt);
11235
11236 return ret;
11237}
11238
11239/**
11240 * xmlSAXUserParseMemory:
11241 * @sax: a SAX handler
11242 * @user_data: The user data returned on SAX callbacks
11243 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +000011244 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011245 *
11246 * A better SAX parsing routine.
11247 * parse an XML in-memory buffer and call the given SAX handler routines.
11248 *
11249 * Returns 0 in case of success or a error number otherwise
11250 */
11251int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
11252 char *buffer, int size) {
11253 int ret = 0;
11254 xmlParserCtxtPtr ctxt;
Daniel Veillard87b95392000-08-12 21:12:04 +000011255 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011256
11257 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11258 if (ctxt == NULL) return -1;
Daniel Veillard87b95392000-08-12 21:12:04 +000011259 if (sax != NULL) {
11260 oldsax = ctxt->sax;
11261 ctxt->sax = sax;
11262 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011263 ctxt->userData = user_data;
11264
11265 xmlParseDocument(ctxt);
11266
11267 if (ctxt->wellFormed)
11268 ret = 0;
11269 else {
11270 if (ctxt->errNo != 0)
11271 ret = ctxt->errNo;
11272 else
11273 ret = -1;
11274 }
Daniel Veillard87b95392000-08-12 21:12:04 +000011275 if (sax != NULL) {
11276 ctxt->sax = oldsax;
11277 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011278 xmlFreeParserCtxt(ctxt);
11279
11280 return ret;
11281}
11282
Daniel Veillard260a68f1998-08-13 03:39:55 +000011283
Daniel Veillardb05deb71999-08-10 19:04:08 +000011284/************************************************************************
11285 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +000011286 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +000011287 * *
11288 ************************************************************************/
11289
Daniel Veillarda819dac1999-11-24 18:04:22 +000011290/**
11291 * xmlCleanupParser:
11292 *
11293 * Cleanup function for the XML parser. It tries to reclaim all
11294 * parsing related global memory allocated for the parser processing.
11295 * It doesn't deallocate any document related memory. Calling this
11296 * function should not prevent reusing the parser.
11297 */
11298
11299void
11300xmlCleanupParser(void) {
11301 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +000011302 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +000011303}
Daniel Veillardb05deb71999-08-10 19:04:08 +000011304
Daniel Veillard11e00581998-10-24 18:27:49 +000011305/**
11306 * xmlParserFindNodeInfo:
11307 * @ctxt: an XML parser context
11308 * @node: an XML node within the tree
11309 *
11310 * Find the parser node info struct for a given node
11311 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011312 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000011313 */
11314const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
11315 const xmlNode* node)
11316{
11317 unsigned long pos;
11318
11319 /* Find position where node should be at */
11320 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
11321 if ( ctx->node_seq.buffer[pos].node == node )
11322 return &ctx->node_seq.buffer[pos];
11323 else
11324 return NULL;
11325}
11326
11327
Daniel Veillard11e00581998-10-24 18:27:49 +000011328/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011329 * xmlInitNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000011330 * @seq: a node info sequence pointer
11331 *
11332 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000011333 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011334void
11335xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011336{
11337 seq->length = 0;
11338 seq->maximum = 0;
11339 seq->buffer = NULL;
11340}
11341
Daniel Veillard11e00581998-10-24 18:27:49 +000011342/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011343 * xmlClearNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000011344 * @seq: a node info sequence pointer
11345 *
11346 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +000011347 * info sequence
11348 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011349void
11350xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011351{
11352 if ( seq->buffer != NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000011353 xmlFree(seq->buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011354 xmlInitNodeInfoSeq(seq);
11355}
11356
11357
Daniel Veillard11e00581998-10-24 18:27:49 +000011358/**
11359 * xmlParserFindNodeInfoIndex:
11360 * @seq: a node info sequence pointer
11361 * @node: an XML node pointer
11362 *
11363 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000011364 * xmlParserFindNodeInfoIndex : Find the index that the info record for
11365 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +000011366 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011367 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +000011368 */
11369unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
11370 const xmlNode* node)
11371{
11372 unsigned long upper, lower, middle;
11373 int found = 0;
11374
11375 /* Do a binary search for the key */
11376 lower = 1;
11377 upper = seq->length;
11378 middle = 0;
11379 while ( lower <= upper && !found) {
11380 middle = lower + (upper - lower) / 2;
11381 if ( node == seq->buffer[middle - 1].node )
11382 found = 1;
11383 else if ( node < seq->buffer[middle - 1].node )
11384 upper = middle - 1;
11385 else
11386 lower = middle + 1;
11387 }
11388
11389 /* Return position */
11390 if ( middle == 0 || seq->buffer[middle - 1].node < node )
11391 return middle;
11392 else
11393 return middle - 1;
11394}
11395
11396
Daniel Veillard11e00581998-10-24 18:27:49 +000011397/**
11398 * xmlParserAddNodeInfo:
11399 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +000011400 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +000011401 *
11402 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000011403 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011404void
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011405xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +000011406 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011407{
11408 unsigned long pos;
11409 static unsigned int block_size = 5;
11410
11411 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011412 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
11413 if ( pos < ctxt->node_seq.length
11414 && ctxt->node_seq.buffer[pos].node == info->node ) {
11415 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011416 }
11417
11418 /* Otherwise, we need to add new node to buffer */
11419 else {
11420 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011421 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011422 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011423 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
11424 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +000011425
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011426 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000011427 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011428 else
Daniel Veillard6454aec1999-09-02 22:04:43 +000011429 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011430
11431 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +000011433 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011434 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011435 return;
11436 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011437 ctxt->node_seq.buffer = tmp_buffer;
11438 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011439 }
11440
11441 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011442 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011443 unsigned long i;
11444
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011445 for ( i = ctxt->node_seq.length; i > pos; i-- )
11446 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +000011447 }
11448
11449 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011450 ctxt->node_seq.buffer[pos] = *info;
11451 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011452 }
11453}
Daniel Veillard011b63c1999-06-02 17:44:04 +000011454
11455
Daniel Veillardb05deb71999-08-10 19:04:08 +000011456/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011457 * xmlSubstituteEntitiesDefault:
Daniel Veillardb05deb71999-08-10 19:04:08 +000011458 * @val: int 0 or 1
11459 *
11460 * Set and return the previous value for default entity support.
11461 * Initially the parser always keep entity references instead of substituting
11462 * entity values in the output. This function has to be used to change the
11463 * default parser behaviour
11464 * SAX::subtituteEntities() has to be used for changing that on a file by
11465 * file basis.
11466 *
11467 * Returns the last value for 0 for no substitution, 1 for substitution.
11468 */
11469
11470int
11471xmlSubstituteEntitiesDefault(int val) {
11472 int old = xmlSubstituteEntitiesDefaultValue;
11473
11474 xmlSubstituteEntitiesDefaultValue = val;
11475 return(old);
11476}
11477
Daniel Veillardfb76c402000-03-04 11:39:42 +000011478/**
11479 * xmlKeepBlanksDefault:
11480 * @val: int 0 or 1
11481 *
11482 * Set and return the previous value for default blanks text nodes support.
11483 * The 1.x version of the parser used an heuristic to try to detect
11484 * ignorable white spaces. As a result the SAX callback was generating
11485 * ignorableWhitespace() callbacks instead of characters() one, and when
11486 * using the DOM output text nodes containing those blanks were not generated.
11487 * The 2.x and later version will switch to the XML standard way and
11488 * ignorableWhitespace() are only generated when running the parser in
11489 * validating mode and when the current element doesn't allow CDATA or
11490 * mixed content.
11491 * This function is provided as a way to force the standard behaviour
11492 * on 1.X libs and to switch back to the old mode for compatibility when
11493 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11494 * by using xmlIsBlankNode() commodity function to detect the "empty"
11495 * nodes generated.
11496 * This value also affect autogeneration of indentation when saving code
11497 * if blanks sections are kept, indentation is not generated.
11498 *
11499 * Returns the last value for 0 for no substitution, 1 for substitution.
11500 */
11501
11502int
11503xmlKeepBlanksDefault(int val) {
11504 int old = xmlKeepBlanksDefaultValue;
11505
11506 xmlKeepBlanksDefaultValue = val;
11507 xmlIndentTreeOutput = !val;
11508 return(old);
11509}
11510