blob: 8a3abd351f5c85099b7bf3cc96c6ba475a210049 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
49
Daniel Veillard56a4cb82001-03-24 17:00:36 +000050void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000051
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000070 xmlInitMemory();
71
Owen Taylor3473f882001-02-23 17:55:21 +000072 if ((myversion / 10000) != (version / 10000)) {
73 xmlGenericError(xmlGenericErrorContext,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
76 exit(1);
77 }
78 if ((myversion / 100) < (version / 100)) {
79 xmlGenericError(xmlGenericErrorContext,
80 "Warning: program compiled against libxml %d using older %d\n",
81 (version / 100), (myversion / 100));
82 }
83}
84
85
Daniel Veillard22090732001-07-16 00:06:07 +000086static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "validate",
88 "load subset",
89 "keep blanks",
90 "disable SAX",
91 "fetch external entities",
92 "substitute entities",
93 "gather line info",
94 "user data",
95 "is html",
96 "is standalone",
97 "stop parser",
98 "document",
99 "is well formed",
100 "is valid",
101 "SAX block",
102 "SAX function internalSubset",
103 "SAX function isStandalone",
104 "SAX function hasInternalSubset",
105 "SAX function hasExternalSubset",
106 "SAX function resolveEntity",
107 "SAX function getEntity",
108 "SAX function entityDecl",
109 "SAX function notationDecl",
110 "SAX function attributeDecl",
111 "SAX function elementDecl",
112 "SAX function unparsedEntityDecl",
113 "SAX function setDocumentLocator",
114 "SAX function startDocument",
115 "SAX function endDocument",
116 "SAX function startElement",
117 "SAX function endElement",
118 "SAX function reference",
119 "SAX function characters",
120 "SAX function ignorableWhitespace",
121 "SAX function processingInstruction",
122 "SAX function comment",
123 "SAX function warning",
124 "SAX function error",
125 "SAX function fatalError",
126 "SAX function getParameterEntity",
127 "SAX function cdataBlock",
128 "SAX function externalSubset",
129};
130
131/*
132 * xmlGetFeaturesList:
133 * @len: the length of the features name array (input/output)
134 * @result: an array of string to be filled with the features name.
135 *
136 * Copy at most *@len feature names into the @result array
137 *
138 * Returns -1 in case or error, or the total number of features,
139 * len is updated with the number of strings copied,
140 * strings must not be deallocated
141 */
142int
143xmlGetFeaturesList(int *len, const char **result) {
144 int ret, i;
145
146 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
147 if ((len == NULL) || (result == NULL))
148 return(ret);
149 if ((*len < 0) || (*len >= 1000))
150 return(-1);
151 if (*len > ret)
152 *len = ret;
153 for (i = 0;i < *len;i++)
154 result[i] = xmlFeaturesList[i];
155 return(ret);
156}
157
158/*
159 * xmlGetFeature:
160 * @ctxt: an XML/HTML parser context
161 * @name: the feature name
162 * @result: location to store the result
163 *
164 * Read the current value of one feature of this parser instance
165 *
166 * Returns -1 in case or error, 0 otherwise
167 */
168int
169xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
170 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
171 return(-1);
172
173 if (!strcmp(name, "validate")) {
174 *((int *) result) = ctxt->validate;
175 } else if (!strcmp(name, "keep blanks")) {
176 *((int *) result) = ctxt->keepBlanks;
177 } else if (!strcmp(name, "disable SAX")) {
178 *((int *) result) = ctxt->disableSAX;
179 } else if (!strcmp(name, "fetch external entities")) {
180 *((int *) result) = ctxt->loadsubset;
181 } else if (!strcmp(name, "substitute entities")) {
182 *((int *) result) = ctxt->replaceEntities;
183 } else if (!strcmp(name, "gather line info")) {
184 *((int *) result) = ctxt->record_info;
185 } else if (!strcmp(name, "user data")) {
186 *((void **)result) = ctxt->userData;
187 } else if (!strcmp(name, "is html")) {
188 *((int *) result) = ctxt->html;
189 } else if (!strcmp(name, "is standalone")) {
190 *((int *) result) = ctxt->standalone;
191 } else if (!strcmp(name, "document")) {
192 *((xmlDocPtr *) result) = ctxt->myDoc;
193 } else if (!strcmp(name, "is well formed")) {
194 *((int *) result) = ctxt->wellFormed;
195 } else if (!strcmp(name, "is valid")) {
196 *((int *) result) = ctxt->valid;
197 } else if (!strcmp(name, "SAX block")) {
198 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
199 } else if (!strcmp(name, "SAX function internalSubset")) {
200 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
201 } else if (!strcmp(name, "SAX function isStandalone")) {
202 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
203 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
204 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
205 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
206 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
207 } else if (!strcmp(name, "SAX function resolveEntity")) {
208 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
209 } else if (!strcmp(name, "SAX function getEntity")) {
210 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
211 } else if (!strcmp(name, "SAX function entityDecl")) {
212 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
213 } else if (!strcmp(name, "SAX function notationDecl")) {
214 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
215 } else if (!strcmp(name, "SAX function attributeDecl")) {
216 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
217 } else if (!strcmp(name, "SAX function elementDecl")) {
218 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
219 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
220 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
221 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
222 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
223 } else if (!strcmp(name, "SAX function startDocument")) {
224 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
225 } else if (!strcmp(name, "SAX function endDocument")) {
226 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
227 } else if (!strcmp(name, "SAX function startElement")) {
228 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
229 } else if (!strcmp(name, "SAX function endElement")) {
230 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
231 } else if (!strcmp(name, "SAX function reference")) {
232 *((referenceSAXFunc *) result) = ctxt->sax->reference;
233 } else if (!strcmp(name, "SAX function characters")) {
234 *((charactersSAXFunc *) result) = ctxt->sax->characters;
235 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
236 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
237 } else if (!strcmp(name, "SAX function processingInstruction")) {
238 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
239 } else if (!strcmp(name, "SAX function comment")) {
240 *((commentSAXFunc *) result) = ctxt->sax->comment;
241 } else if (!strcmp(name, "SAX function warning")) {
242 *((warningSAXFunc *) result) = ctxt->sax->warning;
243 } else if (!strcmp(name, "SAX function error")) {
244 *((errorSAXFunc *) result) = ctxt->sax->error;
245 } else if (!strcmp(name, "SAX function fatalError")) {
246 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
247 } else if (!strcmp(name, "SAX function getParameterEntity")) {
248 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
249 } else if (!strcmp(name, "SAX function cdataBlock")) {
250 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
251 } else if (!strcmp(name, "SAX function externalSubset")) {
252 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
253 } else {
254 return(-1);
255 }
256 return(0);
257}
258
259/*
260 * xmlSetFeature:
261 * @ctxt: an XML/HTML parser context
262 * @name: the feature name
263 * @value: pointer to the location of the new value
264 *
265 * Change the current value of one feature of this parser instance
266 *
267 * Returns -1 in case or error, 0 otherwise
268 */
269int
270xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
271 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
272 return(-1);
273
274 if (!strcmp(name, "validate")) {
275 int newvalidate = *((int *) value);
276 if ((!ctxt->validate) && (newvalidate != 0)) {
277 if (ctxt->vctxt.warning == NULL)
278 ctxt->vctxt.warning = xmlParserValidityWarning;
279 if (ctxt->vctxt.error == NULL)
280 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000282 }
283 ctxt->validate = newvalidate;
284 } else if (!strcmp(name, "keep blanks")) {
285 ctxt->keepBlanks = *((int *) value);
286 } else if (!strcmp(name, "disable SAX")) {
287 ctxt->disableSAX = *((int *) value);
288 } else if (!strcmp(name, "fetch external entities")) {
289 ctxt->loadsubset = *((int *) value);
290 } else if (!strcmp(name, "substitute entities")) {
291 ctxt->replaceEntities = *((int *) value);
292 } else if (!strcmp(name, "gather line info")) {
293 ctxt->record_info = *((int *) value);
294 } else if (!strcmp(name, "user data")) {
295 ctxt->userData = *((void **)value);
296 } else if (!strcmp(name, "is html")) {
297 ctxt->html = *((int *) value);
298 } else if (!strcmp(name, "is standalone")) {
299 ctxt->standalone = *((int *) value);
300 } else if (!strcmp(name, "document")) {
301 ctxt->myDoc = *((xmlDocPtr *) value);
302 } else if (!strcmp(name, "is well formed")) {
303 ctxt->wellFormed = *((int *) value);
304 } else if (!strcmp(name, "is valid")) {
305 ctxt->valid = *((int *) value);
306 } else if (!strcmp(name, "SAX block")) {
307 ctxt->sax = *((xmlSAXHandlerPtr *) value);
308 } else if (!strcmp(name, "SAX function internalSubset")) {
309 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
310 } else if (!strcmp(name, "SAX function isStandalone")) {
311 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
312 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
313 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
315 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function resolveEntity")) {
317 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
318 } else if (!strcmp(name, "SAX function getEntity")) {
319 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
320 } else if (!strcmp(name, "SAX function entityDecl")) {
321 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function notationDecl")) {
323 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function attributeDecl")) {
325 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function elementDecl")) {
327 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
329 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
331 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function startDocument")) {
333 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function endDocument")) {
335 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function startElement")) {
337 ctxt->sax->startElement = *((startElementSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function endElement")) {
339 ctxt->sax->endElement = *((endElementSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function reference")) {
341 ctxt->sax->reference = *((referenceSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function characters")) {
343 ctxt->sax->characters = *((charactersSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
345 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function processingInstruction")) {
347 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function comment")) {
349 ctxt->sax->comment = *((commentSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function warning")) {
351 ctxt->sax->warning = *((warningSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function error")) {
353 ctxt->sax->error = *((errorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function fatalError")) {
355 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function getParameterEntity")) {
357 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
358 } else if (!strcmp(name, "SAX function cdataBlock")) {
359 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function externalSubset")) {
361 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
362 } else {
363 return(-1);
364 }
365 return(0);
366}
367
368/************************************************************************
369 * *
370 * Some functions to avoid too large macros *
371 * *
372 ************************************************************************/
373
374/**
375 * xmlIsChar:
376 * @c: an unicode character (int)
377 *
378 * Check whether the character is allowed by the production
379 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
380 * | [#x10000-#x10FFFF]
381 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
382 * Also available as a macro IS_CHAR()
383 *
384 * Returns 0 if not, non-zero otherwise
385 */
386int
387xmlIsChar(int c) {
388 return(
389 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
390 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
391 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
392 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
393}
394
395/**
396 * xmlIsBlank:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
401 * Also available as a macro IS_BLANK()
402 *
403 * Returns 0 if not, non-zero otherwise
404 */
405int
406xmlIsBlank(int c) {
407 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
408}
409
410/**
411 * xmlIsBaseChar:
412 * @c: an unicode character (int)
413 *
414 * Check whether the character is allowed by the production
415 * [85] BaseChar ::= ... long list see REC ...
416 *
417 * VI is your friend !
418 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
419 * and
420 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
421 *
422 * Returns 0 if not, non-zero otherwise
423 */
424static int xmlBaseArray[] = {
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
429 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
431 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
438 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
440 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
441};
442
443int
444xmlIsBaseChar(int c) {
445 return(
446 (((c) < 0x0100) ? xmlBaseArray[c] :
447 ( /* accelerator */
448 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
449 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
450 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
451 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
452 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
453 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
454 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
455 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
456 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
457 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
458 ((c) == 0x0386) ||
459 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
460 ((c) == 0x038C) ||
461 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
462 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
463 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
464 ((c) == 0x03DA) ||
465 ((c) == 0x03DC) ||
466 ((c) == 0x03DE) ||
467 ((c) == 0x03E0) ||
468 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
469 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
470 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
471 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
472 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
473 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
474 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
475 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
476 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
477 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
478 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
479 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
480 ((c) == 0x0559) ||
481 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
482 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
483 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
484 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
485 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
486 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
487 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
488 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
489 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
490 ((c) == 0x06D5) ||
491 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
492 (((c) >= 0x905) && ( /* accelerator */
493 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
494 ((c) == 0x093D) ||
495 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
496 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
497 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
498 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
499 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
500 ((c) == 0x09B2) ||
501 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
502 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
503 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
504 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
505 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
506 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
507 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
508 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
509 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
510 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
511 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
512 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
513 ((c) == 0x0A5E) ||
514 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
515 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
516 ((c) == 0x0A8D) ||
517 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
518 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
519 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
520 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
521 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
522 ((c) == 0x0ABD) ||
523 ((c) == 0x0AE0) ||
524 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
525 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
526 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
527 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
528 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
529 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
530 ((c) == 0x0B3D) ||
531 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
532 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
533 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
534 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
535 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
536 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
537 ((c) == 0x0B9C) ||
538 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
539 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
540 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
541 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
542 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
543 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
544 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
545 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
546 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
547 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
548 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
549 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
550 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
551 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
552 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
553 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
554 ((c) == 0x0CDE) ||
555 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
556 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
557 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
558 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
559 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
560 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
561 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
562 ((c) == 0x0E30) ||
563 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
564 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
565 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
566 ((c) == 0x0E84) ||
567 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
568 ((c) == 0x0E8A) ||
569 ((c) == 0x0E8D) ||
570 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
571 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
572 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
573 ((c) == 0x0EA5) ||
574 ((c) == 0x0EA7) ||
575 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
576 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
577 ((c) == 0x0EB0) ||
578 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
579 ((c) == 0x0EBD) ||
580 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
581 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
582 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
583 (((c) >= 0x10A0) && ( /* accelerator */
584 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
585 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
586 ((c) == 0x1100) ||
587 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
588 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
589 ((c) == 0x1109) ||
590 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
591 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
592 ((c) == 0x113C) ||
593 ((c) == 0x113E) ||
594 ((c) == 0x1140) ||
595 ((c) == 0x114C) ||
596 ((c) == 0x114E) ||
597 ((c) == 0x1150) ||
598 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
599 ((c) == 0x1159) ||
600 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
601 ((c) == 0x1163) ||
602 ((c) == 0x1165) ||
603 ((c) == 0x1167) ||
604 ((c) == 0x1169) ||
605 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
606 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
607 ((c) == 0x1175) ||
608 ((c) == 0x119E) ||
609 ((c) == 0x11A8) ||
610 ((c) == 0x11AB) ||
611 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
612 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
613 ((c) == 0x11BA) ||
614 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
615 ((c) == 0x11EB) ||
616 ((c) == 0x11F0) ||
617 ((c) == 0x11F9) ||
618 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
619 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
620 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
621 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
622 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
623 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
624 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
625 ((c) == 0x1F59) ||
626 ((c) == 0x1F5B) ||
627 ((c) == 0x1F5D) ||
628 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
629 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
630 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
631 ((c) == 0x1FBE) ||
632 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
633 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
634 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
635 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
636 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
637 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
638 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
639 ((c) == 0x2126) ||
640 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
641 ((c) == 0x212E) ||
642 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
643 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
644 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
645 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
646 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
647}
648
649/**
650 * xmlIsDigit:
651 * @c: an unicode character (int)
652 *
653 * Check whether the character is allowed by the production
654 * [88] Digit ::= ... long list see REC ...
655 *
656 * Returns 0 if not, non-zero otherwise
657 */
658int
659xmlIsDigit(int c) {
660 return(
661 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
662 (((c) >= 0x660) && ( /* accelerator */
663 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
664 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
665 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
666 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
667 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
668 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
669 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
670 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
671 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
672 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
673 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
674 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
675 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
676 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
677}
678
679/**
680 * xmlIsCombining:
681 * @c: an unicode character (int)
682 *
683 * Check whether the character is allowed by the production
684 * [87] CombiningChar ::= ... long list see REC ...
685 *
686 * Returns 0 if not, non-zero otherwise
687 */
688int
689xmlIsCombining(int c) {
690 return(
691 (((c) >= 0x300) && ( /* accelerator */
692 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
693 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
694 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
695 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
696 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
697 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
698 ((c) == 0x05BF) ||
699 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
700 ((c) == 0x05C4) ||
701 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
702 ((c) == 0x0670) ||
703 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
704 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
705 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
706 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
707 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
708 (((c) >= 0x0901) && ( /* accelerator */
709 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
710 ((c) == 0x093C) ||
711 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
712 ((c) == 0x094D) ||
713 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
714 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
715 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
716 ((c) == 0x09BC) ||
717 ((c) == 0x09BE) ||
718 ((c) == 0x09BF) ||
719 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
720 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
721 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
722 ((c) == 0x09D7) ||
723 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
724 (((c) >= 0x0A02) && ( /* accelerator */
725 ((c) == 0x0A02) ||
726 ((c) == 0x0A3C) ||
727 ((c) == 0x0A3E) ||
728 ((c) == 0x0A3F) ||
729 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
730 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
731 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
732 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
733 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
734 ((c) == 0x0ABC) ||
735 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
736 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
737 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
738 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
739 ((c) == 0x0B3C) ||
740 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
741 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
742 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
743 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
744 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
745 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
746 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
747 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
748 ((c) == 0x0BD7) ||
749 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
750 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
751 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
752 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
753 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
754 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
755 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
756 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
757 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
758 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
759 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
760 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
761 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
762 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
763 ((c) == 0x0D57) ||
764 (((c) >= 0x0E31) && ( /* accelerator */
765 ((c) == 0x0E31) ||
766 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
767 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
768 ((c) == 0x0EB1) ||
769 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
770 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
771 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
772 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
773 ((c) == 0x0F35) ||
774 ((c) == 0x0F37) ||
775 ((c) == 0x0F39) ||
776 ((c) == 0x0F3E) ||
777 ((c) == 0x0F3F) ||
778 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
779 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
780 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
781 ((c) == 0x0F97) ||
782 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
783 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
784 ((c) == 0x0FB9) ||
785 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
786 ((c) == 0x20E1) ||
787 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
788 ((c) == 0x3099) ||
789 ((c) == 0x309A))))))))));
790}
791
792/**
793 * xmlIsExtender:
794 * @c: an unicode character (int)
795 *
796 * Check whether the character is allowed by the production
797 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
798 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
799 * [#x309D-#x309E] | [#x30FC-#x30FE]
800 *
801 * Returns 0 if not, non-zero otherwise
802 */
803int
804xmlIsExtender(int c) {
805 switch (c) {
806 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
807 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
808 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
809 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
810 case 0x30FE:
811 return 1;
812 default:
813 return 0;
814 }
815}
816
817/**
818 * xmlIsIdeographic:
819 * @c: an unicode character (int)
820 *
821 * Check whether the character is allowed by the production
822 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
823 *
824 * Returns 0 if not, non-zero otherwise
825 */
826int
827xmlIsIdeographic(int c) {
828 return(((c) < 0x0100) ? 0 :
829 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
830 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
831 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
832 ((c) == 0x3007));
833}
834
835/**
836 * xmlIsLetter:
837 * @c: an unicode character (int)
838 *
839 * Check whether the character is allowed by the production
840 * [84] Letter ::= BaseChar | Ideographic
841 *
842 * Returns 0 if not, non-zero otherwise
843 */
844int
845xmlIsLetter(int c) {
846 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
847}
848
849/**
850 * xmlIsPubidChar:
851 * @c: an unicode character (int)
852 *
853 * Check whether the character is allowed by the production
854 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
855 *
856 * Returns 0 if not, non-zero otherwise
857 */
858int
859xmlIsPubidChar(int c) {
860 return(
861 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
862 (((c) >= 'a') && ((c) <= 'z')) ||
863 (((c) >= 'A') && ((c) <= 'Z')) ||
864 (((c) >= '0') && ((c) <= '9')) ||
865 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
866 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
867 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
868 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
869 ((c) == '$') || ((c) == '_') || ((c) == '%'));
870}
871
872/************************************************************************
873 * *
874 * Input handling functions for progressive parsing *
875 * *
876 ************************************************************************/
877
878/* #define DEBUG_INPUT */
879/* #define DEBUG_STACK */
880/* #define DEBUG_PUSH */
881
882
883/* we need to keep enough input to show errors in context */
884#define LINE_LEN 80
885
886#ifdef DEBUG_INPUT
887#define CHECK_BUFFER(in) check_buffer(in)
888
889void check_buffer(xmlParserInputPtr in) {
890 if (in->base != in->buf->buffer->content) {
891 xmlGenericError(xmlGenericErrorContext,
892 "xmlParserInput: base mismatch problem\n");
893 }
894 if (in->cur < in->base) {
895 xmlGenericError(xmlGenericErrorContext,
896 "xmlParserInput: cur < base problem\n");
897 }
898 if (in->cur > in->base + in->buf->buffer->use) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: cur > base + use problem\n");
901 }
902 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
903 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
904 in->buf->buffer->use, in->buf->buffer->size);
905}
906
907#else
908#define CHECK_BUFFER(in)
909#endif
910
911
912/**
913 * xmlParserInputRead:
914 * @in: an XML parser input
915 * @len: an indicative size for the lookahead
916 *
917 * This function refresh the input for the parser. It doesn't try to
918 * preserve pointers to the input buffer, and discard already read data
919 *
920 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
921 * end of this entity
922 */
923int
924xmlParserInputRead(xmlParserInputPtr in, int len) {
925 int ret;
926 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000927 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000928
929#ifdef DEBUG_INPUT
930 xmlGenericError(xmlGenericErrorContext, "Read\n");
931#endif
932 if (in->buf == NULL) return(-1);
933 if (in->base == NULL) return(-1);
934 if (in->cur == NULL) return(-1);
935 if (in->buf->buffer == NULL) return(-1);
936 if (in->buf->readcallback == NULL) return(-1);
937
938 CHECK_BUFFER(in);
939
940 used = in->cur - in->buf->buffer->content;
941 ret = xmlBufferShrink(in->buf->buffer, used);
942 if (ret > 0) {
943 in->cur -= ret;
944 in->consumed += ret;
945 }
946 ret = xmlParserInputBufferRead(in->buf, len);
947 if (in->base != in->buf->buffer->content) {
948 /*
949 * the buffer has been realloced
950 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000951 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000952 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000953 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000954 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000955 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000956
957 CHECK_BUFFER(in);
958
959 return(ret);
960}
961
962/**
963 * xmlParserInputGrow:
964 * @in: an XML parser input
965 * @len: an indicative size for the lookahead
966 *
967 * This function increase the input for the parser. It tries to
968 * preserve pointers to the input buffer, and keep already read data
969 *
970 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
971 * end of this entity
972 */
973int
974xmlParserInputGrow(xmlParserInputPtr in, int len) {
975 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000976 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978#ifdef DEBUG_INPUT
979 xmlGenericError(xmlGenericErrorContext, "Grow\n");
980#endif
981 if (in->buf == NULL) return(-1);
982 if (in->base == NULL) return(-1);
983 if (in->cur == NULL) return(-1);
984 if (in->buf->buffer == NULL) return(-1);
985
986 CHECK_BUFFER(in);
987
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000988 indx = in->cur - in->base;
989 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000990
991 CHECK_BUFFER(in);
992
993 return(0);
994 }
995 if (in->buf->readcallback != NULL)
996 ret = xmlParserInputBufferGrow(in->buf, len);
997 else
998 return(0);
999
1000 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001001 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001002 * block, but we use it really as an integer to do some
1003 * pointer arithmetic. Insure will raise it as a bug but in
1004 * that specific case, that's not !
1005 */
1006 if (in->base != in->buf->buffer->content) {
1007 /*
1008 * the buffer has been realloced
1009 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001012 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001014 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001015
1016 CHECK_BUFFER(in);
1017
1018 return(ret);
1019}
1020
1021/**
1022 * xmlParserInputShrink:
1023 * @in: an XML parser input
1024 *
1025 * This function removes used input for the parser.
1026 */
1027void
1028xmlParserInputShrink(xmlParserInputPtr in) {
1029 int used;
1030 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001032
1033#ifdef DEBUG_INPUT
1034 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1035#endif
1036 if (in->buf == NULL) return;
1037 if (in->base == NULL) return;
1038 if (in->cur == NULL) return;
1039 if (in->buf->buffer == NULL) return;
1040
1041 CHECK_BUFFER(in);
1042
1043 used = in->cur - in->buf->buffer->content;
1044 /*
1045 * Do not shrink on large buffers whose only a tiny fraction
1046 * was consumned
1047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001048 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001049 return;
1050 if (used > INPUT_CHUNK) {
1051 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1052 if (ret > 0) {
1053 in->cur -= ret;
1054 in->consumed += ret;
1055 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001056 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001057 }
1058
1059 CHECK_BUFFER(in);
1060
1061 if (in->buf->buffer->use > INPUT_CHUNK) {
1062 return;
1063 }
1064 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1065 if (in->base != in->buf->buffer->content) {
1066 /*
1067 * the buffer has been realloced
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001071 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001072 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001073 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001074
1075 CHECK_BUFFER(in);
1076}
1077
1078/************************************************************************
1079 * *
1080 * UTF8 character input and related functions *
1081 * *
1082 ************************************************************************/
1083
1084/**
1085 * xmlNextChar:
1086 * @ctxt: the XML parser context
1087 *
1088 * Skip to the next char input char.
1089 */
1090
1091void
1092xmlNextChar(xmlParserCtxtPtr ctxt) {
1093 if (ctxt->instate == XML_PARSER_EOF)
1094 return;
1095
1096 /*
1097 * 2.11 End-of-Line Handling
1098 * the literal two-character sequence "#xD#xA" or a standalone
1099 * literal #xD, an XML processor must pass to the application
1100 * the single character #xA.
1101 */
1102 if (ctxt->token != 0) ctxt->token = 0;
1103 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1106 (ctxt->instate != XML_PARSER_COMMENT)) {
1107 /*
1108 * If we are at the end of the current entity and
1109 * the context allows it, we pop consumed entities
1110 * automatically.
1111 * the auto closing should be blocked in other cases
1112 */
1113 xmlPopInput(ctxt);
1114 } else {
1115 if (*(ctxt->input->cur) == '\n') {
1116 ctxt->input->line++; ctxt->input->col = 1;
1117 } else ctxt->input->col++;
1118 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1119 /*
1120 * We are supposed to handle UTF8, check it's valid
1121 * From rfc2044: encoding of the Unicode values on UTF-8:
1122 *
1123 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1124 * 0000 0000-0000 007F 0xxxxxxx
1125 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1126 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1127 *
1128 * Check for the 0x110000 limit too
1129 */
1130 const unsigned char *cur = ctxt->input->cur;
1131 unsigned char c;
1132
1133 c = *cur;
1134 if (c & 0x80) {
1135 if (cur[1] == 0)
1136 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1137 if ((cur[1] & 0xc0) != 0x80)
1138 goto encoding_error;
1139 if ((c & 0xe0) == 0xe0) {
1140 unsigned int val;
1141
1142 if (cur[2] == 0)
1143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1144 if ((cur[2] & 0xc0) != 0x80)
1145 goto encoding_error;
1146 if ((c & 0xf0) == 0xf0) {
1147 if (cur[3] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if (((c & 0xf8) != 0xf0) ||
1150 ((cur[3] & 0xc0) != 0x80))
1151 goto encoding_error;
1152 /* 4-byte code */
1153 ctxt->input->cur += 4;
1154 val = (cur[0] & 0x7) << 18;
1155 val |= (cur[1] & 0x3f) << 12;
1156 val |= (cur[2] & 0x3f) << 6;
1157 val |= cur[3] & 0x3f;
1158 } else {
1159 /* 3-byte code */
1160 ctxt->input->cur += 3;
1161 val = (cur[0] & 0xf) << 12;
1162 val |= (cur[1] & 0x3f) << 6;
1163 val |= cur[2] & 0x3f;
1164 }
1165 if (((val > 0xd7ff) && (val < 0xe000)) ||
1166 ((val > 0xfffd) && (val < 0x10000)) ||
1167 (val >= 0x110000)) {
1168 if ((ctxt->sax != NULL) &&
1169 (ctxt->sax->error != NULL))
1170 ctxt->sax->error(ctxt->userData,
1171 "Char 0x%X out of allowed range\n", val);
1172 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1173 ctxt->wellFormed = 0;
1174 ctxt->disableSAX = 1;
1175 }
1176 } else
1177 /* 2-byte code */
1178 ctxt->input->cur += 2;
1179 } else
1180 /* 1-byte code */
1181 ctxt->input->cur++;
1182 } else {
1183 /*
1184 * Assume it's a fixed lenght encoding (1) with
1185 * a compatibke encoding for the ASCII set, since
1186 * XML constructs only use < 128 chars
1187 */
1188 ctxt->input->cur++;
1189 }
1190 ctxt->nbChars++;
1191 if (*ctxt->input->cur == 0)
1192 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1193 }
1194 } else {
1195 ctxt->input->cur++;
1196 ctxt->nbChars++;
1197 if (*ctxt->input->cur == 0)
1198 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1199 }
1200 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1201 xmlParserHandlePEReference(ctxt);
1202 if ((*ctxt->input->cur == 0) &&
1203 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1204 xmlPopInput(ctxt);
1205 return;
1206encoding_error:
1207 /*
1208 * If we detect an UTF8 error that probably mean that the
1209 * input encoding didn't get properly advertized in the
1210 * declaration header. Report the error and switch the encoding
1211 * to ISO-Latin-1 (if you don't like this policy, just declare the
1212 * encoding !)
1213 */
1214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1215 ctxt->sax->error(ctxt->userData,
1216 "Input is not proper UTF-8, indicate encoding !\n");
1217 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1218 ctxt->input->cur[0], ctxt->input->cur[1],
1219 ctxt->input->cur[2], ctxt->input->cur[3]);
1220 }
1221 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1222
1223 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1224 ctxt->input->cur++;
1225 return;
1226}
1227
1228/**
1229 * xmlCurrentChar:
1230 * @ctxt: the XML parser context
1231 * @len: pointer to the length of the char read
1232 *
1233 * The current char value, if using UTF-8 this may actaully span multiple
1234 * bytes in the input buffer. Implement the end of line normalization:
1235 * 2.11 End-of-Line Handling
1236 * Wherever an external parsed entity or the literal entity value
1237 * of an internal parsed entity contains either the literal two-character
1238 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1239 * must pass to the application the single character #xA.
1240 * This behavior can conveniently be produced by normalizing all
1241 * line breaks to #xA on input, before parsing.)
1242 *
1243 * Returns the current char value and its lenght
1244 */
1245
1246int
1247xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1248 if (ctxt->instate == XML_PARSER_EOF)
1249 return(0);
1250
1251 if (ctxt->token != 0) {
1252 *len = 0;
1253 return(ctxt->token);
1254 }
1255 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1256 *len = 1;
1257 return((int) *ctxt->input->cur);
1258 }
1259 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1260 /*
1261 * We are supposed to handle UTF8, check it's valid
1262 * From rfc2044: encoding of the Unicode values on UTF-8:
1263 *
1264 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1265 * 0000 0000-0000 007F 0xxxxxxx
1266 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1267 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1268 *
1269 * Check for the 0x110000 limit too
1270 */
1271 const unsigned char *cur = ctxt->input->cur;
1272 unsigned char c;
1273 unsigned int val;
1274
1275 c = *cur;
1276 if (c & 0x80) {
1277 if (cur[1] == 0)
1278 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1279 if ((cur[1] & 0xc0) != 0x80)
1280 goto encoding_error;
1281 if ((c & 0xe0) == 0xe0) {
1282
1283 if (cur[2] == 0)
1284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 if ((cur[2] & 0xc0) != 0x80)
1286 goto encoding_error;
1287 if ((c & 0xf0) == 0xf0) {
1288 if (cur[3] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if (((c & 0xf8) != 0xf0) ||
1291 ((cur[3] & 0xc0) != 0x80))
1292 goto encoding_error;
1293 /* 4-byte code */
1294 *len = 4;
1295 val = (cur[0] & 0x7) << 18;
1296 val |= (cur[1] & 0x3f) << 12;
1297 val |= (cur[2] & 0x3f) << 6;
1298 val |= cur[3] & 0x3f;
1299 } else {
1300 /* 3-byte code */
1301 *len = 3;
1302 val = (cur[0] & 0xf) << 12;
1303 val |= (cur[1] & 0x3f) << 6;
1304 val |= cur[2] & 0x3f;
1305 }
1306 } else {
1307 /* 2-byte code */
1308 *len = 2;
1309 val = (cur[0] & 0x1f) << 6;
1310 val |= cur[1] & 0x3f;
1311 }
1312 if (!IS_CHAR(val)) {
1313 if ((ctxt->sax != NULL) &&
1314 (ctxt->sax->error != NULL))
1315 ctxt->sax->error(ctxt->userData,
1316 "Char 0x%X out of allowed range\n", val);
1317 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1318 ctxt->wellFormed = 0;
1319 ctxt->disableSAX = 1;
1320 }
1321 return(val);
1322 } else {
1323 /* 1-byte code */
1324 *len = 1;
1325 if (*ctxt->input->cur == 0xD) {
1326 if (ctxt->input->cur[1] == 0xA) {
1327 ctxt->nbChars++;
1328 ctxt->input->cur++;
1329 }
1330 return(0xA);
1331 }
1332 return((int) *ctxt->input->cur);
1333 }
1334 }
1335 /*
1336 * Assume it's a fixed lenght encoding (1) with
1337 * a compatibke encoding for the ASCII set, since
1338 * XML constructs only use < 128 chars
1339 */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
1342 if (ctxt->input->cur[1] == 0xA) {
1343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349encoding_error:
1350 /*
1351 * If we detect an UTF8 error that probably mean that the
1352 * input encoding didn't get properly advertized in the
1353 * declaration header. Report the error and switch the encoding
1354 * to ISO-Latin-1 (if you don't like this policy, just declare the
1355 * encoding !)
1356 */
1357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1358 ctxt->sax->error(ctxt->userData,
1359 "Input is not proper UTF-8, indicate encoding !\n");
1360 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1361 ctxt->input->cur[0], ctxt->input->cur[1],
1362 ctxt->input->cur[2], ctxt->input->cur[3]);
1363 }
1364 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1365
1366 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1367 *len = 1;
1368 return((int) *ctxt->input->cur);
1369}
1370
1371/**
1372 * xmlStringCurrentChar:
1373 * @ctxt: the XML parser context
1374 * @cur: pointer to the beginning of the char
1375 * @len: pointer to the length of the char read
1376 *
1377 * The current char value, if using UTF-8 this may actaully span multiple
1378 * bytes in the input buffer.
1379 *
1380 * Returns the current char value and its lenght
1381 */
1382
1383int
1384xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001385 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001386 /*
1387 * We are supposed to handle UTF8, check it's valid
1388 * From rfc2044: encoding of the Unicode values on UTF-8:
1389 *
1390 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1391 * 0000 0000-0000 007F 0xxxxxxx
1392 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1393 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1394 *
1395 * Check for the 0x110000 limit too
1396 */
1397 unsigned char c;
1398 unsigned int val;
1399
1400 c = *cur;
1401 if (c & 0x80) {
1402 if ((cur[1] & 0xc0) != 0x80)
1403 goto encoding_error;
1404 if ((c & 0xe0) == 0xe0) {
1405
1406 if ((cur[2] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xf0) == 0xf0) {
1409 if (((c & 0xf8) != 0xf0) ||
1410 ((cur[3] & 0xc0) != 0x80))
1411 goto encoding_error;
1412 /* 4-byte code */
1413 *len = 4;
1414 val = (cur[0] & 0x7) << 18;
1415 val |= (cur[1] & 0x3f) << 12;
1416 val |= (cur[2] & 0x3f) << 6;
1417 val |= cur[3] & 0x3f;
1418 } else {
1419 /* 3-byte code */
1420 *len = 3;
1421 val = (cur[0] & 0xf) << 12;
1422 val |= (cur[1] & 0x3f) << 6;
1423 val |= cur[2] & 0x3f;
1424 }
1425 } else {
1426 /* 2-byte code */
1427 *len = 2;
1428 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001429 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001430 }
1431 if (!IS_CHAR(val)) {
1432 if ((ctxt->sax != NULL) &&
1433 (ctxt->sax->error != NULL))
1434 ctxt->sax->error(ctxt->userData,
1435 "Char 0x%X out of allowed range\n", val);
1436 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1437 ctxt->wellFormed = 0;
1438 ctxt->disableSAX = 1;
1439 }
1440 return(val);
1441 } else {
1442 /* 1-byte code */
1443 *len = 1;
1444 return((int) *cur);
1445 }
1446 }
1447 /*
1448 * Assume it's a fixed lenght encoding (1) with
1449 * a compatibke encoding for the ASCII set, since
1450 * XML constructs only use < 128 chars
1451 */
1452 *len = 1;
1453 return((int) *cur);
1454encoding_error:
1455 /*
1456 * If we detect an UTF8 error that probably mean that the
1457 * input encoding didn't get properly advertized in the
1458 * declaration header. Report the error and switch the encoding
1459 * to ISO-Latin-1 (if you don't like this policy, just declare the
1460 * encoding !)
1461 */
1462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1463 ctxt->sax->error(ctxt->userData,
1464 "Input is not proper UTF-8, indicate encoding !\n");
1465 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1466 ctxt->input->cur[0], ctxt->input->cur[1],
1467 ctxt->input->cur[2], ctxt->input->cur[3]);
1468 }
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470
1471 *len = 1;
1472 return((int) *cur);
1473}
1474
1475/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001476 * xmlCopyCharMultiByte:
1477 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * @val: the char value
1479 *
1480 * append the char value in the array
1481 *
1482 * Returns the number of xmlChar written
1483 */
Owen Taylor3473f882001-02-23 17:55:21 +00001484int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001485xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001486 /*
1487 * We are supposed to handle UTF8, check it's valid
1488 * From rfc2044: encoding of the Unicode values on UTF-8:
1489 *
1490 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1491 * 0000 0000-0000 007F 0xxxxxxx
1492 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1493 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495 if (val >= 0x80) {
1496 xmlChar *savedout = out;
1497 int bits;
1498 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1499 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1500 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1501 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001502 xmlGenericError(xmlGenericErrorContext,
1503 "Internal error, xmlCopyChar 0x%X out of bound\n",
1504 val);
1505 return(0);
1506 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507 for ( ; bits >= 0; bits-= 6)
1508 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1509 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001510 }
1511 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512 return 1;
1513}
1514
1515/**
1516 * xmlCopyChar:
1517 * @len: Ignored, compatibility
1518 * @out: pointer to an arry of xmlChar
1519 * @val: the char value
1520 *
1521 * append the char value in the array
1522 *
1523 * Returns the number of xmlChar written
1524 */
1525
1526int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001527xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 /* the len parameter is ignored */
1529 if (val >= 0x80) {
1530 return(xmlCopyCharMultiByte (out, val));
1531 }
1532 *out = (xmlChar) val;
1533 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001534}
1535
1536/************************************************************************
1537 * *
1538 * Commodity functions to switch encodings *
1539 * *
1540 ************************************************************************/
1541
1542/**
1543 * xmlSwitchEncoding:
1544 * @ctxt: the parser context
1545 * @enc: the encoding value (number)
1546 *
1547 * change the input functions when discovering the character encoding
1548 * of a given entity.
1549 *
1550 * Returns 0 in case of success, -1 otherwise
1551 */
1552int
1553xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1554{
1555 xmlCharEncodingHandlerPtr handler;
1556
1557 switch (enc) {
1558 case XML_CHAR_ENCODING_ERROR:
1559 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1562 ctxt->wellFormed = 0;
1563 ctxt->disableSAX = 1;
1564 break;
1565 case XML_CHAR_ENCODING_NONE:
1566 /* let's assume it's UTF-8 without the XML decl */
1567 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1568 return(0);
1569 case XML_CHAR_ENCODING_UTF8:
1570 /* default encoding, no conversion should be needed */
1571 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001572
1573 /*
1574 * Errata on XML-1.0 June 20 2001
1575 * Specific handling of the Byte Order Mark for
1576 * UTF-8
1577 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001578 if ((ctxt->input != NULL) &&
1579 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001580 (ctxt->input->cur[1] == 0xBB) &&
1581 (ctxt->input->cur[2] == 0xBF)) {
1582 ctxt->input->cur += 3;
1583 }
Owen Taylor3473f882001-02-23 17:55:21 +00001584 return(0);
1585 default:
1586 break;
1587 }
1588 handler = xmlGetCharEncodingHandler(enc);
1589 if (handler == NULL) {
1590 /*
1591 * Default handlers.
1592 */
1593 switch (enc) {
1594 case XML_CHAR_ENCODING_ERROR:
1595 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1597 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1598 ctxt->wellFormed = 0;
1599 ctxt->disableSAX = 1;
1600 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1601 break;
1602 case XML_CHAR_ENCODING_NONE:
1603 /* let's assume it's UTF-8 without the XML decl */
1604 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1605 return(0);
1606 case XML_CHAR_ENCODING_UTF8:
1607 case XML_CHAR_ENCODING_ASCII:
1608 /* default encoding, no conversion should be needed */
1609 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1610 return(0);
1611 case XML_CHAR_ENCODING_UTF16LE:
1612 break;
1613 case XML_CHAR_ENCODING_UTF16BE:
1614 break;
1615 case XML_CHAR_ENCODING_UCS4LE:
1616 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData,
1619 "char encoding USC4 little endian not supported\n");
1620 break;
1621 case XML_CHAR_ENCODING_UCS4BE:
1622 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624 ctxt->sax->error(ctxt->userData,
1625 "char encoding USC4 big endian not supported\n");
1626 break;
1627 case XML_CHAR_ENCODING_EBCDIC:
1628 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1630 ctxt->sax->error(ctxt->userData,
1631 "char encoding EBCDIC not supported\n");
1632 break;
1633 case XML_CHAR_ENCODING_UCS4_2143:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding UCS4 2143 not supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_UCS4_3412:
1640 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642 ctxt->sax->error(ctxt->userData,
1643 "char encoding UCS4 3412 not supported\n");
1644 break;
1645 case XML_CHAR_ENCODING_UCS2:
1646 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "char encoding UCS2 not supported\n");
1650 break;
1651 case XML_CHAR_ENCODING_8859_1:
1652 case XML_CHAR_ENCODING_8859_2:
1653 case XML_CHAR_ENCODING_8859_3:
1654 case XML_CHAR_ENCODING_8859_4:
1655 case XML_CHAR_ENCODING_8859_5:
1656 case XML_CHAR_ENCODING_8859_6:
1657 case XML_CHAR_ENCODING_8859_7:
1658 case XML_CHAR_ENCODING_8859_8:
1659 case XML_CHAR_ENCODING_8859_9:
1660 /*
1661 * We used to keep the internal content in the
1662 * document encoding however this turns being unmaintainable
1663 * So xmlGetCharEncodingHandler() will return non-null
1664 * values for this now.
1665 */
1666 if ((ctxt->inputNr == 1) &&
1667 (ctxt->encoding == NULL) &&
1668 (ctxt->input->encoding != NULL)) {
1669 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1670 }
1671 ctxt->charset = enc;
1672 return(0);
1673 case XML_CHAR_ENCODING_2022_JP:
1674 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1676 ctxt->sax->error(ctxt->userData,
1677 "char encoding ISO-2022-JPnot supported\n");
1678 break;
1679 case XML_CHAR_ENCODING_SHIFT_JIS:
1680 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1682 ctxt->sax->error(ctxt->userData,
1683 "char encoding Shift_JIS not supported\n");
1684 break;
1685 case XML_CHAR_ENCODING_EUC_JP:
1686 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1688 ctxt->sax->error(ctxt->userData,
1689 "char encoding EUC-JPnot supported\n");
1690 break;
1691 }
1692 }
1693 if (handler == NULL)
1694 return(-1);
1695 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1696 return(xmlSwitchToEncoding(ctxt, handler));
1697}
1698
1699/**
1700 * xmlSwitchToEncoding:
1701 * @ctxt: the parser context
1702 * @handler: the encoding handler
1703 *
1704 * change the input functions when discovering the character encoding
1705 * of a given entity.
1706 *
1707 * Returns 0 in case of success, -1 otherwise
1708 */
1709int
1710xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1711{
1712 int nbchars;
1713
1714 if (handler != NULL) {
1715 if (ctxt->input != NULL) {
1716 if (ctxt->input->buf != NULL) {
1717 if (ctxt->input->buf->encoder != NULL) {
1718 if (ctxt->input->buf->encoder == handler)
1719 return(0);
1720 /*
1721 * Note: this is a bit dangerous, but that's what it
1722 * takes to use nearly compatible signature for different
1723 * encodings.
1724 */
1725 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1726 ctxt->input->buf->encoder = handler;
1727 return(0);
1728 }
1729 ctxt->input->buf->encoder = handler;
1730
1731 /*
1732 * Is there already some content down the pipe to convert ?
1733 */
1734 if ((ctxt->input->buf->buffer != NULL) &&
1735 (ctxt->input->buf->buffer->use > 0)) {
1736 int processed;
1737
1738 /*
1739 * Specific handling of the Byte Order Mark for
1740 * UTF-16
1741 */
1742 if ((handler->name != NULL) &&
1743 (!strcmp(handler->name, "UTF-16LE")) &&
1744 (ctxt->input->cur[0] == 0xFF) &&
1745 (ctxt->input->cur[1] == 0xFE)) {
1746 ctxt->input->cur += 2;
1747 }
1748 if ((handler->name != NULL) &&
1749 (!strcmp(handler->name, "UTF-16BE")) &&
1750 (ctxt->input->cur[0] == 0xFE) &&
1751 (ctxt->input->cur[1] == 0xFF)) {
1752 ctxt->input->cur += 2;
1753 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001754 /*
1755 * Errata on XML-1.0 June 20 2001
1756 * Specific handling of the Byte Order Mark for
1757 * UTF-8
1758 */
1759 if ((handler->name != NULL) &&
1760 (!strcmp(handler->name, "UTF-8")) &&
1761 (ctxt->input->cur[0] == 0xEF) &&
1762 (ctxt->input->cur[1] == 0xBB) &&
1763 (ctxt->input->cur[1] == 0xBF)) {
1764 ctxt->input->cur += 3;
1765 }
Owen Taylor3473f882001-02-23 17:55:21 +00001766
1767 /*
1768 * Shring the current input buffer.
1769 * Move it as the raw buffer and create a new input buffer
1770 */
1771 processed = ctxt->input->cur - ctxt->input->base;
1772 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1773 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1774 ctxt->input->buf->buffer = xmlBufferCreate();
1775
1776 if (ctxt->html) {
1777 /*
1778 * converst as much as possbile of the buffer
1779 */
1780 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1781 ctxt->input->buf->buffer,
1782 ctxt->input->buf->raw);
1783 } else {
1784 /*
1785 * convert just enough to get
1786 * '<?xml version="1.0" encoding="xxx"?>'
1787 * parsed with the autodetected encoding
1788 * into the parser reading buffer.
1789 */
1790 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1791 ctxt->input->buf->buffer,
1792 ctxt->input->buf->raw);
1793 }
1794 if (nbchars < 0) {
1795 xmlGenericError(xmlGenericErrorContext,
1796 "xmlSwitchToEncoding: encoder error\n");
1797 return(-1);
1798 }
1799 ctxt->input->base =
1800 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001801 ctxt->input->end =
1802 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001803
1804 }
1805 return(0);
1806 } else {
1807 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1808 /*
1809 * When parsing a static memory array one must know the
1810 * size to be able to convert the buffer.
1811 */
1812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1813 ctxt->sax->error(ctxt->userData,
1814 "xmlSwitchEncoding : no input\n");
1815 return(-1);
1816 } else {
1817 int processed;
1818
1819 /*
1820 * Shring the current input buffer.
1821 * Move it as the raw buffer and create a new input buffer
1822 */
1823 processed = ctxt->input->cur - ctxt->input->base;
1824
1825 ctxt->input->buf->raw = xmlBufferCreate();
1826 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1827 ctxt->input->length - processed);
1828 ctxt->input->buf->buffer = xmlBufferCreate();
1829
1830 /*
1831 * convert as much as possible of the raw input
1832 * to the parser reading buffer.
1833 */
1834 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1835 ctxt->input->buf->buffer,
1836 ctxt->input->buf->raw);
1837 if (nbchars < 0) {
1838 xmlGenericError(xmlGenericErrorContext,
1839 "xmlSwitchToEncoding: encoder error\n");
1840 return(-1);
1841 }
1842
1843 /*
1844 * Conversion succeeded, get rid of the old buffer
1845 */
1846 if ((ctxt->input->free != NULL) &&
1847 (ctxt->input->base != NULL))
1848 ctxt->input->free((xmlChar *) ctxt->input->base);
1849 ctxt->input->base =
1850 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001851 ctxt->input->end =
1852 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001853 }
1854 }
1855 } else {
1856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1857 ctxt->sax->error(ctxt->userData,
1858 "xmlSwitchEncoding : no input\n");
1859 return(-1);
1860 }
1861 /*
1862 * The parsing is now done in UTF8 natively
1863 */
1864 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1865 } else
1866 return(-1);
1867 return(0);
1868
1869}
1870
1871/************************************************************************
1872 * *
1873 * Commodity functions to handle entities processing *
1874 * *
1875 ************************************************************************/
1876
1877/**
1878 * xmlFreeInputStream:
1879 * @input: an xmlParserInputPtr
1880 *
1881 * Free up an input stream.
1882 */
1883void
1884xmlFreeInputStream(xmlParserInputPtr input) {
1885 if (input == NULL) return;
1886
1887 if (input->filename != NULL) xmlFree((char *) input->filename);
1888 if (input->directory != NULL) xmlFree((char *) input->directory);
1889 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1890 if (input->version != NULL) xmlFree((char *) input->version);
1891 if ((input->free != NULL) && (input->base != NULL))
1892 input->free((xmlChar *) input->base);
1893 if (input->buf != NULL)
1894 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001895 xmlFree(input);
1896}
1897
1898/**
1899 * xmlNewInputStream:
1900 * @ctxt: an XML parser context
1901 *
1902 * Create a new input stream structure
1903 * Returns the new input stream or NULL
1904 */
1905xmlParserInputPtr
1906xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1907 xmlParserInputPtr input;
1908
1909 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1910 if (input == NULL) {
1911 if (ctxt != NULL) {
1912 ctxt->errNo = XML_ERR_NO_MEMORY;
1913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1914 ctxt->sax->error(ctxt->userData,
1915 "malloc: couldn't allocate a new input stream\n");
1916 ctxt->errNo = XML_ERR_NO_MEMORY;
1917 }
1918 return(NULL);
1919 }
1920 memset(input, 0, sizeof(xmlParserInput));
1921 input->line = 1;
1922 input->col = 1;
1923 input->standalone = -1;
1924 return(input);
1925}
1926
1927/**
1928 * xmlNewIOInputStream:
1929 * @ctxt: an XML parser context
1930 * @input: an I/O Input
1931 * @enc: the charset encoding if known
1932 *
1933 * Create a new input stream structure encapsulating the @input into
1934 * a stream suitable for the parser.
1935 *
1936 * Returns the new input stream or NULL
1937 */
1938xmlParserInputPtr
1939xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1940 xmlCharEncoding enc) {
1941 xmlParserInputPtr inputStream;
1942
1943 if (xmlParserDebugEntities)
1944 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1945 inputStream = xmlNewInputStream(ctxt);
1946 if (inputStream == NULL) {
1947 return(NULL);
1948 }
1949 inputStream->filename = NULL;
1950 inputStream->buf = input;
1951 inputStream->base = inputStream->buf->buffer->content;
1952 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001953 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001954 if (enc != XML_CHAR_ENCODING_NONE) {
1955 xmlSwitchEncoding(ctxt, enc);
1956 }
1957
1958 return(inputStream);
1959}
1960
1961/**
1962 * xmlNewEntityInputStream:
1963 * @ctxt: an XML parser context
1964 * @entity: an Entity pointer
1965 *
1966 * Create a new input stream based on an xmlEntityPtr
1967 *
1968 * Returns the new input stream or NULL
1969 */
1970xmlParserInputPtr
1971xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1972 xmlParserInputPtr input;
1973
1974 if (entity == NULL) {
1975 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1977 ctxt->sax->error(ctxt->userData,
1978 "internal: xmlNewEntityInputStream entity = NULL\n");
1979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1980 return(NULL);
1981 }
1982 if (xmlParserDebugEntities)
1983 xmlGenericError(xmlGenericErrorContext,
1984 "new input from entity: %s\n", entity->name);
1985 if (entity->content == NULL) {
1986 switch (entity->etype) {
1987 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1988 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990 ctxt->sax->error(ctxt->userData,
1991 "xmlNewEntityInputStream unparsed entity !\n");
1992 break;
1993 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1994 case XML_EXTERNAL_PARAMETER_ENTITY:
1995 return(xmlLoadExternalEntity((char *) entity->URI,
1996 (char *) entity->ExternalID, ctxt));
1997 case XML_INTERNAL_GENERAL_ENTITY:
1998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1999 ctxt->sax->error(ctxt->userData,
2000 "Internal entity %s without content !\n", entity->name);
2001 break;
2002 case XML_INTERNAL_PARAMETER_ENTITY:
2003 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "Internal parameter entity %s without content !\n", entity->name);
2007 break;
2008 case XML_INTERNAL_PREDEFINED_ENTITY:
2009 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2011 ctxt->sax->error(ctxt->userData,
2012 "Predefined entity %s without content !\n", entity->name);
2013 break;
2014 }
2015 return(NULL);
2016 }
2017 input = xmlNewInputStream(ctxt);
2018 if (input == NULL) {
2019 return(NULL);
2020 }
2021 input->filename = (char *) entity->URI;
2022 input->base = entity->content;
2023 input->cur = entity->content;
2024 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002025 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002026 return(input);
2027}
2028
2029/**
2030 * xmlNewStringInputStream:
2031 * @ctxt: an XML parser context
2032 * @buffer: an memory buffer
2033 *
2034 * Create a new input stream based on a memory buffer.
2035 * Returns the new input stream
2036 */
2037xmlParserInputPtr
2038xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2039 xmlParserInputPtr input;
2040
2041 if (buffer == NULL) {
2042 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2044 ctxt->sax->error(ctxt->userData,
2045 "internal: xmlNewStringInputStream string = NULL\n");
2046 return(NULL);
2047 }
2048 if (xmlParserDebugEntities)
2049 xmlGenericError(xmlGenericErrorContext,
2050 "new fixed input: %.30s\n", buffer);
2051 input = xmlNewInputStream(ctxt);
2052 if (input == NULL) {
2053 return(NULL);
2054 }
2055 input->base = buffer;
2056 input->cur = buffer;
2057 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002058 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002059 return(input);
2060}
2061
2062/**
2063 * xmlNewInputFromFile:
2064 * @ctxt: an XML parser context
2065 * @filename: the filename to use as entity
2066 *
2067 * Create a new input stream based on a file.
2068 *
2069 * Returns the new input stream or NULL in case of error
2070 */
2071xmlParserInputPtr
2072xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2073 xmlParserInputBufferPtr buf;
2074 xmlParserInputPtr inputStream;
2075 char *directory = NULL;
2076 xmlChar *URI = NULL;
2077
2078 if (xmlParserDebugEntities)
2079 xmlGenericError(xmlGenericErrorContext,
2080 "new input from file: %s\n", filename);
2081 if (ctxt == NULL) return(NULL);
2082 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2083 if (buf == NULL)
2084 return(NULL);
2085
2086 URI = xmlStrdup((xmlChar *) filename);
2087 directory = xmlParserGetDirectory((const char *) URI);
2088
2089 inputStream = xmlNewInputStream(ctxt);
2090 if (inputStream == NULL) {
2091 if (directory != NULL) xmlFree((char *) directory);
2092 if (URI != NULL) xmlFree((char *) URI);
2093 return(NULL);
2094 }
2095
2096 inputStream->filename = (const char *) URI;
2097 inputStream->directory = directory;
2098 inputStream->buf = buf;
2099
2100 inputStream->base = inputStream->buf->buffer->content;
2101 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002102 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002103 if ((ctxt->directory == NULL) && (directory != NULL))
2104 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2105 return(inputStream);
2106}
2107
2108/************************************************************************
2109 * *
2110 * Commodity functions to handle parser contexts *
2111 * *
2112 ************************************************************************/
2113
2114/**
2115 * xmlInitParserCtxt:
2116 * @ctxt: an XML parser context
2117 *
2118 * Initialize a parser context
2119 */
2120
2121void
2122xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2123{
2124 xmlSAXHandler *sax;
2125
2126 xmlDefaultSAXHandlerInit();
2127
2128 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2129 if (sax == NULL) {
2130 xmlGenericError(xmlGenericErrorContext,
2131 "xmlInitParserCtxt: out of memory\n");
2132 }
2133 else
2134 memset(sax, 0, sizeof(xmlSAXHandler));
2135
2136 /* Allocate the Input stack */
2137 ctxt->inputTab = (xmlParserInputPtr *)
2138 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2139 if (ctxt->inputTab == NULL) {
2140 xmlGenericError(xmlGenericErrorContext,
2141 "xmlInitParserCtxt: out of memory\n");
2142 ctxt->inputNr = 0;
2143 ctxt->inputMax = 0;
2144 ctxt->input = NULL;
2145 return;
2146 }
2147 ctxt->inputNr = 0;
2148 ctxt->inputMax = 5;
2149 ctxt->input = NULL;
2150
2151 ctxt->version = NULL;
2152 ctxt->encoding = NULL;
2153 ctxt->standalone = -1;
2154 ctxt->hasExternalSubset = 0;
2155 ctxt->hasPErefs = 0;
2156 ctxt->html = 0;
2157 ctxt->external = 0;
2158 ctxt->instate = XML_PARSER_START;
2159 ctxt->token = 0;
2160 ctxt->directory = NULL;
2161
2162 /* Allocate the Node stack */
2163 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2164 if (ctxt->nodeTab == NULL) {
2165 xmlGenericError(xmlGenericErrorContext,
2166 "xmlInitParserCtxt: out of memory\n");
2167 ctxt->nodeNr = 0;
2168 ctxt->nodeMax = 0;
2169 ctxt->node = NULL;
2170 ctxt->inputNr = 0;
2171 ctxt->inputMax = 0;
2172 ctxt->input = NULL;
2173 return;
2174 }
2175 ctxt->nodeNr = 0;
2176 ctxt->nodeMax = 10;
2177 ctxt->node = NULL;
2178
2179 /* Allocate the Name stack */
2180 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2181 if (ctxt->nameTab == NULL) {
2182 xmlGenericError(xmlGenericErrorContext,
2183 "xmlInitParserCtxt: out of memory\n");
2184 ctxt->nodeNr = 0;
2185 ctxt->nodeMax = 0;
2186 ctxt->node = NULL;
2187 ctxt->inputNr = 0;
2188 ctxt->inputMax = 0;
2189 ctxt->input = NULL;
2190 ctxt->nameNr = 0;
2191 ctxt->nameMax = 0;
2192 ctxt->name = NULL;
2193 return;
2194 }
2195 ctxt->nameNr = 0;
2196 ctxt->nameMax = 10;
2197 ctxt->name = NULL;
2198
2199 /* Allocate the space stack */
2200 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2201 if (ctxt->spaceTab == NULL) {
2202 xmlGenericError(xmlGenericErrorContext,
2203 "xmlInitParserCtxt: out of memory\n");
2204 ctxt->nodeNr = 0;
2205 ctxt->nodeMax = 0;
2206 ctxt->node = NULL;
2207 ctxt->inputNr = 0;
2208 ctxt->inputMax = 0;
2209 ctxt->input = NULL;
2210 ctxt->nameNr = 0;
2211 ctxt->nameMax = 0;
2212 ctxt->name = NULL;
2213 ctxt->spaceNr = 0;
2214 ctxt->spaceMax = 0;
2215 ctxt->space = NULL;
2216 return;
2217 }
2218 ctxt->spaceNr = 1;
2219 ctxt->spaceMax = 10;
2220 ctxt->spaceTab[0] = -1;
2221 ctxt->space = &ctxt->spaceTab[0];
2222
Daniel Veillard14be0a12001-03-03 18:50:55 +00002223 ctxt->sax = sax;
2224 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2225
Owen Taylor3473f882001-02-23 17:55:21 +00002226 ctxt->userData = ctxt;
2227 ctxt->myDoc = NULL;
2228 ctxt->wellFormed = 1;
2229 ctxt->valid = 1;
2230 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2231 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2232 ctxt->pedantic = xmlPedanticParserDefaultValue;
2233 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2234 ctxt->vctxt.userData = ctxt;
2235 if (ctxt->validate) {
2236 ctxt->vctxt.error = xmlParserValidityError;
2237 if (xmlGetWarningsDefaultValue == 0)
2238 ctxt->vctxt.warning = NULL;
2239 else
2240 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002241 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002242 } else {
2243 ctxt->vctxt.error = NULL;
2244 ctxt->vctxt.warning = NULL;
2245 }
2246 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2247 ctxt->record_info = 0;
2248 ctxt->nbChars = 0;
2249 ctxt->checkIndex = 0;
2250 ctxt->inSubset = 0;
2251 ctxt->errNo = XML_ERR_OK;
2252 ctxt->depth = 0;
2253 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2254 xmlInitNodeInfoSeq(&ctxt->node_seq);
2255}
2256
2257/**
2258 * xmlFreeParserCtxt:
2259 * @ctxt: an XML parser context
2260 *
2261 * Free all the memory used by a parser context. However the parsed
2262 * document in ctxt->myDoc is not freed.
2263 */
2264
2265void
2266xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2267{
2268 xmlParserInputPtr input;
2269 xmlChar *oldname;
2270
2271 if (ctxt == NULL) return;
2272
2273 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2274 xmlFreeInputStream(input);
2275 }
2276 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2277 xmlFree(oldname);
2278 }
2279 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2280 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2281 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2282 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2283 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2284 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2285 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2286 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2287 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002288 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2289 xmlFree(ctxt->sax);
2290 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002291 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002292 xmlFree(ctxt);
2293}
2294
2295/**
2296 * xmlNewParserCtxt:
2297 *
2298 * Allocate and initialize a new parser context.
2299 *
2300 * Returns the xmlParserCtxtPtr or NULL
2301 */
2302
2303xmlParserCtxtPtr
2304xmlNewParserCtxt()
2305{
2306 xmlParserCtxtPtr ctxt;
2307
2308 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2309 if (ctxt == NULL) {
2310 xmlGenericError(xmlGenericErrorContext,
2311 "xmlNewParserCtxt : cannot allocate context\n");
2312 perror("malloc");
2313 return(NULL);
2314 }
2315 memset(ctxt, 0, sizeof(xmlParserCtxt));
2316 xmlInitParserCtxt(ctxt);
2317 return(ctxt);
2318}
2319
2320/************************************************************************
2321 * *
2322 * Handling of node informations *
2323 * *
2324 ************************************************************************/
2325
2326/**
2327 * xmlClearParserCtxt:
2328 * @ctxt: an XML parser context
2329 *
2330 * Clear (release owned resources) and reinitialize a parser context
2331 */
2332
2333void
2334xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2335{
2336 xmlClearNodeInfoSeq(&ctxt->node_seq);
2337 xmlInitParserCtxt(ctxt);
2338}
2339
2340/**
2341 * xmlParserFindNodeInfo:
2342 * @ctxt: an XML parser context
2343 * @node: an XML node within the tree
2344 *
2345 * Find the parser node info struct for a given node
2346 *
2347 * Returns an xmlParserNodeInfo block pointer or NULL
2348 */
2349const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2350 const xmlNode* node)
2351{
2352 unsigned long pos;
2353
2354 /* Find position where node should be at */
2355 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2356 if ( ctx->node_seq.buffer[pos].node == node )
2357 return &ctx->node_seq.buffer[pos];
2358 else
2359 return NULL;
2360}
2361
2362
2363/**
2364 * xmlInitNodeInfoSeq:
2365 * @seq: a node info sequence pointer
2366 *
2367 * -- Initialize (set to initial state) node info sequence
2368 */
2369void
2370xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2371{
2372 seq->length = 0;
2373 seq->maximum = 0;
2374 seq->buffer = NULL;
2375}
2376
2377/**
2378 * xmlClearNodeInfoSeq:
2379 * @seq: a node info sequence pointer
2380 *
2381 * -- Clear (release memory and reinitialize) node
2382 * info sequence
2383 */
2384void
2385xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2386{
2387 if ( seq->buffer != NULL )
2388 xmlFree(seq->buffer);
2389 xmlInitNodeInfoSeq(seq);
2390}
2391
2392
2393/**
2394 * xmlParserFindNodeInfoIndex:
2395 * @seq: a node info sequence pointer
2396 * @node: an XML node pointer
2397 *
2398 *
2399 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2400 * the given node is or should be at in a sorted sequence
2401 *
2402 * Returns a long indicating the position of the record
2403 */
2404unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2405 const xmlNode* node)
2406{
2407 unsigned long upper, lower, middle;
2408 int found = 0;
2409
2410 /* Do a binary search for the key */
2411 lower = 1;
2412 upper = seq->length;
2413 middle = 0;
2414 while ( lower <= upper && !found) {
2415 middle = lower + (upper - lower) / 2;
2416 if ( node == seq->buffer[middle - 1].node )
2417 found = 1;
2418 else if ( node < seq->buffer[middle - 1].node )
2419 upper = middle - 1;
2420 else
2421 lower = middle + 1;
2422 }
2423
2424 /* Return position */
2425 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2426 return middle;
2427 else
2428 return middle - 1;
2429}
2430
2431
2432/**
2433 * xmlParserAddNodeInfo:
2434 * @ctxt: an XML parser context
2435 * @info: a node info sequence pointer
2436 *
2437 * Insert node info record into the sorted sequence
2438 */
2439void
2440xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2441 const xmlParserNodeInfo* info)
2442{
2443 unsigned long pos;
2444 static unsigned int block_size = 5;
2445
2446 /* Find pos and check to see if node is already in the sequence */
2447 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2448 if ( pos < ctxt->node_seq.length
2449 && ctxt->node_seq.buffer[pos].node == info->node ) {
2450 ctxt->node_seq.buffer[pos] = *info;
2451 }
2452
2453 /* Otherwise, we need to add new node to buffer */
2454 else {
2455 /* Expand buffer by 5 if needed */
2456 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2457 xmlParserNodeInfo* tmp_buffer;
2458 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2459 *(ctxt->node_seq.maximum + block_size));
2460
2461 if ( ctxt->node_seq.buffer == NULL )
2462 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2463 else
2464 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2465
2466 if ( tmp_buffer == NULL ) {
2467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2468 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2469 ctxt->errNo = XML_ERR_NO_MEMORY;
2470 return;
2471 }
2472 ctxt->node_seq.buffer = tmp_buffer;
2473 ctxt->node_seq.maximum += block_size;
2474 }
2475
2476 /* If position is not at end, move elements out of the way */
2477 if ( pos != ctxt->node_seq.length ) {
2478 unsigned long i;
2479
2480 for ( i = ctxt->node_seq.length; i > pos; i-- )
2481 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2482 }
2483
2484 /* Copy element and increase length */
2485 ctxt->node_seq.buffer[pos] = *info;
2486 ctxt->node_seq.length++;
2487 }
2488}
2489
2490/************************************************************************
2491 * *
2492 * Deprecated functions kept for compatibility *
2493 * *
2494 ************************************************************************/
2495
2496/*
2497 * xmlCheckLanguageID
2498 * @lang: pointer to the string value
2499 *
2500 * Checks that the value conforms to the LanguageID production:
2501 *
2502 * NOTE: this is somewhat deprecated, those productions were removed from
2503 * the XML Second edition.
2504 *
2505 * [33] LanguageID ::= Langcode ('-' Subcode)*
2506 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2507 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2508 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2509 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2510 * [38] Subcode ::= ([a-z] | [A-Z])+
2511 *
2512 * Returns 1 if correct 0 otherwise
2513 **/
2514int
2515xmlCheckLanguageID(const xmlChar *lang) {
2516 const xmlChar *cur = lang;
2517
2518 if (cur == NULL)
2519 return(0);
2520 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2521 ((cur[0] == 'I') && (cur[1] == '-'))) {
2522 /*
2523 * IANA code
2524 */
2525 cur += 2;
2526 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2527 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2528 cur++;
2529 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2530 ((cur[0] == 'X') && (cur[1] == '-'))) {
2531 /*
2532 * User code
2533 */
2534 cur += 2;
2535 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2536 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2537 cur++;
2538 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2539 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2540 /*
2541 * ISO639
2542 */
2543 cur++;
2544 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2545 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2546 cur++;
2547 else
2548 return(0);
2549 } else
2550 return(0);
2551 while (cur[0] != 0) { /* non input consuming */
2552 if (cur[0] != '-')
2553 return(0);
2554 cur++;
2555 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2556 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2557 cur++;
2558 else
2559 return(0);
2560 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2561 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2562 cur++;
2563 }
2564 return(1);
2565}
2566
2567/**
2568 * xmlDecodeEntities:
2569 * @ctxt: the parser context
2570 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2571 * @len: the len to decode (in bytes !), -1 for no size limit
2572 * @end: an end marker xmlChar, 0 if none
2573 * @end2: an end marker xmlChar, 0 if none
2574 * @end3: an end marker xmlChar, 0 if none
2575 *
2576 * This function is deprecated, we now always process entities content
2577 * through xmlStringDecodeEntities
2578 *
2579 * TODO: remove it in next major release.
2580 *
2581 * [67] Reference ::= EntityRef | CharRef
2582 *
2583 * [69] PEReference ::= '%' Name ';'
2584 *
2585 * Returns A newly allocated string with the substitution done. The caller
2586 * must deallocate it !
2587 */
2588xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002589xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2590 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002591#if 0
2592 xmlChar *buffer = NULL;
2593 unsigned int buffer_size = 0;
2594 unsigned int nbchars = 0;
2595
2596 xmlChar *current = NULL;
2597 xmlEntityPtr ent;
2598 unsigned int max = (unsigned int) len;
2599 int c,l;
2600#endif
2601
2602 static int deprecated = 0;
2603 if (!deprecated) {
2604 xmlGenericError(xmlGenericErrorContext,
2605 "xmlDecodeEntities() deprecated function reached\n");
2606 deprecated = 1;
2607 }
2608
2609#if 0
2610 if (ctxt->depth > 40) {
2611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2612 ctxt->sax->error(ctxt->userData,
2613 "Detected entity reference loop\n");
2614 ctxt->wellFormed = 0;
2615 ctxt->disableSAX = 1;
2616 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2617 return(NULL);
2618 }
2619
2620 /*
2621 * allocate a translation buffer.
2622 */
2623 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2624 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2625 if (buffer == NULL) {
2626 perror("xmlDecodeEntities: malloc failed");
2627 return(NULL);
2628 }
2629
2630 /*
2631 * Ok loop until we reach one of the ending char or a size limit.
2632 */
2633 GROW;
2634 c = CUR_CHAR(l);
2635 while ((nbchars < max) && (c != end) && /* NOTUSED */
2636 (c != end2) && (c != end3)) {
2637 GROW;
2638 if (c == 0) break;
2639 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2640 int val = xmlParseCharRef(ctxt);
2641 COPY_BUF(0,buffer,nbchars,val);
2642 NEXTL(l);
2643 } else if ((c == '&') && (ctxt->token != '&') &&
2644 (what & XML_SUBSTITUTE_REF)) {
2645 if (xmlParserDebugEntities)
2646 xmlGenericError(xmlGenericErrorContext,
2647 "decoding Entity Reference\n");
2648 ent = xmlParseEntityRef(ctxt);
2649 if ((ent != NULL) &&
2650 (ctxt->replaceEntities != 0)) {
2651 current = ent->content;
2652 while (*current != 0) { /* non input consuming loop */
2653 buffer[nbchars++] = *current++;
2654 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2655 growBuffer(buffer);
2656 }
2657 }
2658 } else if (ent != NULL) {
2659 const xmlChar *cur = ent->name;
2660
2661 buffer[nbchars++] = '&';
2662 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2663 growBuffer(buffer);
2664 }
2665 while (*cur != 0) { /* non input consuming loop */
2666 buffer[nbchars++] = *cur++;
2667 }
2668 buffer[nbchars++] = ';';
2669 }
2670 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2671 /*
2672 * a PEReference induce to switch the entity flow,
2673 * we break here to flush the current set of chars
2674 * parsed if any. We will be called back later.
2675 */
2676 if (xmlParserDebugEntities)
2677 xmlGenericError(xmlGenericErrorContext,
2678 "decoding PE Reference\n");
2679 if (nbchars != 0) break;
2680
2681 xmlParsePEReference(ctxt);
2682
2683 /*
2684 * Pop-up of finished entities.
2685 */
2686 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2687 xmlPopInput(ctxt);
2688
2689 break;
2690 } else {
2691 COPY_BUF(l,buffer,nbchars,c);
2692 NEXTL(l);
2693 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2694 growBuffer(buffer);
2695 }
2696 }
2697 c = CUR_CHAR(l);
2698 }
2699 buffer[nbchars++] = 0;
2700 return(buffer);
2701#endif
2702 return(NULL);
2703}
2704
2705/**
2706 * xmlNamespaceParseNCName:
2707 * @ctxt: an XML parser context
2708 *
2709 * parse an XML namespace name.
2710 *
2711 * TODO: this seems not in use anymore, the namespace handling is done on
2712 * top of the SAX interfaces, i.e. not on raw input.
2713 *
2714 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2715 *
2716 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2717 * CombiningChar | Extender
2718 *
2719 * Returns the namespace name or NULL
2720 */
2721
2722xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002723xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002724#if 0
2725 xmlChar buf[XML_MAX_NAMELEN + 5];
2726 int len = 0, l;
2727 int cur = CUR_CHAR(l);
2728#endif
2729
2730 static int deprecated = 0;
2731 if (!deprecated) {
2732 xmlGenericError(xmlGenericErrorContext,
2733 "xmlNamespaceParseNCName() deprecated function reached\n");
2734 deprecated = 1;
2735 }
2736
2737#if 0
2738 /* load first the value of the char !!! */
2739 GROW;
2740 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2741
2742xmlGenericError(xmlGenericErrorContext,
2743 "xmlNamespaceParseNCName: reached loop 3\n");
2744 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2745 (cur == '.') || (cur == '-') ||
2746 (cur == '_') ||
2747 (IS_COMBINING(cur)) ||
2748 (IS_EXTENDER(cur))) {
2749 COPY_BUF(l,buf,len,cur);
2750 NEXTL(l);
2751 cur = CUR_CHAR(l);
2752 if (len >= XML_MAX_NAMELEN) {
2753 xmlGenericError(xmlGenericErrorContext,
2754 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2755 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2756 (cur == '.') || (cur == '-') ||
2757 (cur == '_') ||
2758 (IS_COMBINING(cur)) ||
2759 (IS_EXTENDER(cur))) {
2760 NEXTL(l);
2761 cur = CUR_CHAR(l);
2762 }
2763 break;
2764 }
2765 }
2766 return(xmlStrndup(buf, len));
2767#endif
2768 return(NULL);
2769}
2770
2771/**
2772 * xmlNamespaceParseQName:
2773 * @ctxt: an XML parser context
2774 * @prefix: a xmlChar **
2775 *
2776 * TODO: this seems not in use anymore, the namespace handling is done on
2777 * top of the SAX interfaces, i.e. not on raw input.
2778 *
2779 * parse an XML qualified name
2780 *
2781 * [NS 5] QName ::= (Prefix ':')? LocalPart
2782 *
2783 * [NS 6] Prefix ::= NCName
2784 *
2785 * [NS 7] LocalPart ::= NCName
2786 *
2787 * Returns the local part, and prefix is updated
2788 * to get the Prefix if any.
2789 */
2790
2791xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002792xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002793
2794 static int deprecated = 0;
2795 if (!deprecated) {
2796 xmlGenericError(xmlGenericErrorContext,
2797 "xmlNamespaceParseQName() deprecated function reached\n");
2798 deprecated = 1;
2799 }
2800
2801#if 0
2802 xmlChar *ret = NULL;
2803
2804 *prefix = NULL;
2805 ret = xmlNamespaceParseNCName(ctxt);
2806 if (RAW == ':') {
2807 *prefix = ret;
2808 NEXT;
2809 ret = xmlNamespaceParseNCName(ctxt);
2810 }
2811
2812 return(ret);
2813#endif
2814 return(NULL);
2815}
2816
2817/**
2818 * xmlNamespaceParseNSDef:
2819 * @ctxt: an XML parser context
2820 *
2821 * parse a namespace prefix declaration
2822 *
2823 * TODO: this seems not in use anymore, the namespace handling is done on
2824 * top of the SAX interfaces, i.e. not on raw input.
2825 *
2826 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2827 *
2828 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2829 *
2830 * Returns the namespace name
2831 */
2832
2833xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002834xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002835 static int deprecated = 0;
2836 if (!deprecated) {
2837 xmlGenericError(xmlGenericErrorContext,
2838 "xmlNamespaceParseNSDef() deprecated function reached\n");
2839 deprecated = 1;
2840 }
2841 return(NULL);
2842#if 0
2843 xmlChar *name = NULL;
2844
2845 if ((RAW == 'x') && (NXT(1) == 'm') &&
2846 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2847 (NXT(4) == 's')) {
2848 SKIP(5);
2849 if (RAW == ':') {
2850 NEXT;
2851 name = xmlNamespaceParseNCName(ctxt);
2852 }
2853 }
2854 return(name);
2855#endif
2856}
2857
2858/**
2859 * xmlParseQuotedString:
2860 * @ctxt: an XML parser context
2861 *
2862 * Parse and return a string between quotes or doublequotes
2863 *
2864 * TODO: Deprecated, to be removed at next drop of binary compatibility
2865 *
2866 * Returns the string parser or NULL.
2867 */
2868xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002869xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002870 static int deprecated = 0;
2871 if (!deprecated) {
2872 xmlGenericError(xmlGenericErrorContext,
2873 "xmlParseQuotedString() deprecated function reached\n");
2874 deprecated = 1;
2875 }
2876 return(NULL);
2877
2878#if 0
2879 xmlChar *buf = NULL;
2880 int len = 0,l;
2881 int size = XML_PARSER_BUFFER_SIZE;
2882 int c;
2883
2884 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2885 if (buf == NULL) {
2886 xmlGenericError(xmlGenericErrorContext,
2887 "malloc of %d byte failed\n", size);
2888 return(NULL);
2889 }
2890xmlGenericError(xmlGenericErrorContext,
2891 "xmlParseQuotedString: reached loop 4\n");
2892 if (RAW == '"') {
2893 NEXT;
2894 c = CUR_CHAR(l);
2895 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2896 if (len + 5 >= size) {
2897 size *= 2;
2898 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2899 if (buf == NULL) {
2900 xmlGenericError(xmlGenericErrorContext,
2901 "realloc of %d byte failed\n", size);
2902 return(NULL);
2903 }
2904 }
2905 COPY_BUF(l,buf,len,c);
2906 NEXTL(l);
2907 c = CUR_CHAR(l);
2908 }
2909 if (c != '"') {
2910 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2912 ctxt->sax->error(ctxt->userData,
2913 "String not closed \"%.50s\"\n", buf);
2914 ctxt->wellFormed = 0;
2915 ctxt->disableSAX = 1;
2916 } else {
2917 NEXT;
2918 }
2919 } else if (RAW == '\''){
2920 NEXT;
2921 c = CUR;
2922 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2923 if (len + 1 >= size) {
2924 size *= 2;
2925 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2926 if (buf == NULL) {
2927 xmlGenericError(xmlGenericErrorContext,
2928 "realloc of %d byte failed\n", size);
2929 return(NULL);
2930 }
2931 }
2932 buf[len++] = c;
2933 NEXT;
2934 c = CUR;
2935 }
2936 if (RAW != '\'') {
2937 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2939 ctxt->sax->error(ctxt->userData,
2940 "String not closed \"%.50s\"\n", buf);
2941 ctxt->wellFormed = 0;
2942 ctxt->disableSAX = 1;
2943 } else {
2944 NEXT;
2945 }
2946 }
2947 return(buf);
2948#endif
2949}
2950
2951/**
2952 * xmlParseNamespace:
2953 * @ctxt: an XML parser context
2954 *
2955 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2956 *
2957 * This is what the older xml-name Working Draft specified, a bunch of
2958 * other stuff may still rely on it, so support is still here as
2959 * if it was declared on the root of the Tree:-(
2960 *
2961 * TODO: remove from library
2962 *
2963 * To be removed at next drop of binary compatibility
2964 */
2965
2966void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002967xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002968 static int deprecated = 0;
2969 if (!deprecated) {
2970 xmlGenericError(xmlGenericErrorContext,
2971 "xmlParseNamespace() deprecated function reached\n");
2972 deprecated = 1;
2973 }
2974
2975#if 0
2976 xmlChar *href = NULL;
2977 xmlChar *prefix = NULL;
2978 int garbage = 0;
2979
2980 /*
2981 * We just skipped "namespace" or "xml:namespace"
2982 */
2983 SKIP_BLANKS;
2984
2985xmlGenericError(xmlGenericErrorContext,
2986 "xmlParseNamespace: reached loop 5\n");
2987 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2988 /*
2989 * We can have "ns" or "prefix" attributes
2990 * Old encoding as 'href' or 'AS' attributes is still supported
2991 */
2992 if ((RAW == 'n') && (NXT(1) == 's')) {
2993 garbage = 0;
2994 SKIP(2);
2995 SKIP_BLANKS;
2996
2997 if (RAW != '=') continue;
2998 NEXT;
2999 SKIP_BLANKS;
3000
3001 href = xmlParseQuotedString(ctxt);
3002 SKIP_BLANKS;
3003 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3004 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3005 garbage = 0;
3006 SKIP(4);
3007 SKIP_BLANKS;
3008
3009 if (RAW != '=') continue;
3010 NEXT;
3011 SKIP_BLANKS;
3012
3013 href = xmlParseQuotedString(ctxt);
3014 SKIP_BLANKS;
3015 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3016 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3017 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3018 garbage = 0;
3019 SKIP(6);
3020 SKIP_BLANKS;
3021
3022 if (RAW != '=') continue;
3023 NEXT;
3024 SKIP_BLANKS;
3025
3026 prefix = xmlParseQuotedString(ctxt);
3027 SKIP_BLANKS;
3028 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3029 garbage = 0;
3030 SKIP(2);
3031 SKIP_BLANKS;
3032
3033 if (RAW != '=') continue;
3034 NEXT;
3035 SKIP_BLANKS;
3036
3037 prefix = xmlParseQuotedString(ctxt);
3038 SKIP_BLANKS;
3039 } else if ((RAW == '?') && (NXT(1) == '>')) {
3040 garbage = 0;
3041 NEXT;
3042 } else {
3043 /*
3044 * Found garbage when parsing the namespace
3045 */
3046 if (!garbage) {
3047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3048 ctxt->sax->error(ctxt->userData,
3049 "xmlParseNamespace found garbage\n");
3050 }
3051 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3052 ctxt->wellFormed = 0;
3053 ctxt->disableSAX = 1;
3054 NEXT;
3055 }
3056 }
3057
3058 MOVETO_ENDTAG(CUR_PTR);
3059 NEXT;
3060
3061 /*
3062 * Register the DTD.
3063 if (href != NULL)
3064 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3065 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3066 */
3067
3068 if (prefix != NULL) xmlFree(prefix);
3069 if (href != NULL) xmlFree(href);
3070#endif
3071}
3072
3073/**
3074 * xmlScanName:
3075 * @ctxt: an XML parser context
3076 *
3077 * Trickery: parse an XML name but without consuming the input flow
3078 * Needed for rollback cases. Used only when parsing entities references.
3079 *
3080 * TODO: seems deprecated now, only used in the default part of
3081 * xmlParserHandleReference
3082 *
3083 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3084 * CombiningChar | Extender
3085 *
3086 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3087 *
3088 * [6] Names ::= Name (S Name)*
3089 *
3090 * Returns the Name parsed or NULL
3091 */
3092
3093xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003094xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003095 static int deprecated = 0;
3096 if (!deprecated) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "xmlScanName() deprecated function reached\n");
3099 deprecated = 1;
3100 }
3101 return(NULL);
3102
3103#if 0
3104 xmlChar buf[XML_MAX_NAMELEN];
3105 int len = 0;
3106
3107 GROW;
3108 if (!IS_LETTER(RAW) && (RAW != '_') &&
3109 (RAW != ':')) {
3110 return(NULL);
3111 }
3112
3113
3114 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3115 (NXT(len) == '.') || (NXT(len) == '-') ||
3116 (NXT(len) == '_') || (NXT(len) == ':') ||
3117 (IS_COMBINING(NXT(len))) ||
3118 (IS_EXTENDER(NXT(len)))) {
3119 GROW;
3120 buf[len] = NXT(len);
3121 len++;
3122 if (len >= XML_MAX_NAMELEN) {
3123 xmlGenericError(xmlGenericErrorContext,
3124 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3125 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3126 (IS_DIGIT(NXT(len))) ||
3127 (NXT(len) == '.') || (NXT(len) == '-') ||
3128 (NXT(len) == '_') || (NXT(len) == ':') ||
3129 (IS_COMBINING(NXT(len))) ||
3130 (IS_EXTENDER(NXT(len))))
3131 len++;
3132 break;
3133 }
3134 }
3135 return(xmlStrndup(buf, len));
3136#endif
3137}
3138
3139/**
3140 * xmlParserHandleReference:
3141 * @ctxt: the parser context
3142 *
3143 * TODO: Remove, now deprecated ... the test is done directly in the
3144 * content parsing
3145 * routines.
3146 *
3147 * [67] Reference ::= EntityRef | CharRef
3148 *
3149 * [68] EntityRef ::= '&' Name ';'
3150 *
3151 * [ WFC: Entity Declared ]
3152 * the Name given in the entity reference must match that in an entity
3153 * declaration, except that well-formed documents need not declare any
3154 * of the following entities: amp, lt, gt, apos, quot.
3155 *
3156 * [ WFC: Parsed Entity ]
3157 * An entity reference must not contain the name of an unparsed entity
3158 *
3159 * [66] CharRef ::= '&#' [0-9]+ ';' |
3160 * '&#x' [0-9a-fA-F]+ ';'
3161 *
3162 * A PEReference may have been detectect in the current input stream
3163 * the handling is done accordingly to
3164 * http://www.w3.org/TR/REC-xml#entproc
3165 */
3166void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003167xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003168 static int deprecated = 0;
3169 if (!deprecated) {
3170 xmlGenericError(xmlGenericErrorContext,
3171 "xmlParserHandleReference() deprecated function reached\n");
3172 deprecated = 1;
3173 }
3174
3175#if 0
3176 xmlParserInputPtr input;
3177 xmlChar *name;
3178 xmlEntityPtr ent = NULL;
3179
3180 if (ctxt->token != 0) {
3181 return;
3182 }
3183 if (RAW != '&') return;
3184 GROW;
3185 if ((RAW == '&') && (NXT(1) == '#')) {
3186 switch(ctxt->instate) {
3187 case XML_PARSER_ENTITY_DECL:
3188 case XML_PARSER_PI:
3189 case XML_PARSER_CDATA_SECTION:
3190 case XML_PARSER_COMMENT:
3191 case XML_PARSER_SYSTEM_LITERAL:
3192 /* we just ignore it there */
3193 return;
3194 case XML_PARSER_START_TAG:
3195 return;
3196 case XML_PARSER_END_TAG:
3197 return;
3198 case XML_PARSER_EOF:
3199 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3201 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3202 ctxt->wellFormed = 0;
3203 ctxt->disableSAX = 1;
3204 return;
3205 case XML_PARSER_PROLOG:
3206 case XML_PARSER_START:
3207 case XML_PARSER_MISC:
3208 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3210 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3211 ctxt->wellFormed = 0;
3212 ctxt->disableSAX = 1;
3213 return;
3214 case XML_PARSER_EPILOG:
3215 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3217 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3218 ctxt->wellFormed = 0;
3219 ctxt->disableSAX = 1;
3220 return;
3221 case XML_PARSER_DTD:
3222 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3224 ctxt->sax->error(ctxt->userData,
3225 "CharRef are forbiden in DTDs!\n");
3226 ctxt->wellFormed = 0;
3227 ctxt->disableSAX = 1;
3228 return;
3229 case XML_PARSER_ENTITY_VALUE:
3230 /*
3231 * NOTE: in the case of entity values, we don't do the
3232 * substitution here since we need the literal
3233 * entity value to be able to save the internal
3234 * subset of the document.
3235 * This will be handled by xmlStringDecodeEntities
3236 */
3237 return;
3238 case XML_PARSER_CONTENT:
3239 return;
3240 case XML_PARSER_ATTRIBUTE_VALUE:
3241 /* ctxt->token = xmlParseCharRef(ctxt); */
3242 return;
3243 case XML_PARSER_IGNORE:
3244 return;
3245 }
3246 return;
3247 }
3248
3249 switch(ctxt->instate) {
3250 case XML_PARSER_CDATA_SECTION:
3251 return;
3252 case XML_PARSER_PI:
3253 case XML_PARSER_COMMENT:
3254 case XML_PARSER_SYSTEM_LITERAL:
3255 case XML_PARSER_CONTENT:
3256 return;
3257 case XML_PARSER_START_TAG:
3258 return;
3259 case XML_PARSER_END_TAG:
3260 return;
3261 case XML_PARSER_EOF:
3262 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3264 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3265 ctxt->wellFormed = 0;
3266 ctxt->disableSAX = 1;
3267 return;
3268 case XML_PARSER_PROLOG:
3269 case XML_PARSER_START:
3270 case XML_PARSER_MISC:
3271 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3273 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3274 ctxt->wellFormed = 0;
3275 ctxt->disableSAX = 1;
3276 return;
3277 case XML_PARSER_EPILOG:
3278 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3280 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3281 ctxt->wellFormed = 0;
3282 ctxt->disableSAX = 1;
3283 return;
3284 case XML_PARSER_ENTITY_VALUE:
3285 /*
3286 * NOTE: in the case of entity values, we don't do the
3287 * substitution here since we need the literal
3288 * entity value to be able to save the internal
3289 * subset of the document.
3290 * This will be handled by xmlStringDecodeEntities
3291 */
3292 return;
3293 case XML_PARSER_ATTRIBUTE_VALUE:
3294 /*
3295 * NOTE: in the case of attributes values, we don't do the
3296 * substitution here unless we are in a mode where
3297 * the parser is explicitely asked to substitute
3298 * entities. The SAX callback is called with values
3299 * without entity substitution.
3300 * This will then be handled by xmlStringDecodeEntities
3301 */
3302 return;
3303 case XML_PARSER_ENTITY_DECL:
3304 /*
3305 * we just ignore it there
3306 * the substitution will be done once the entity is referenced
3307 */
3308 return;
3309 case XML_PARSER_DTD:
3310 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3312 ctxt->sax->error(ctxt->userData,
3313 "Entity references are forbiden in DTDs!\n");
3314 ctxt->wellFormed = 0;
3315 ctxt->disableSAX = 1;
3316 return;
3317 case XML_PARSER_IGNORE:
3318 return;
3319 }
3320
3321/* TODO: this seems not reached anymore .... Verify ... */
3322xmlGenericError(xmlGenericErrorContext,
3323 "Reached deprecated section in xmlParserHandleReference()\n");
3324xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003325 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003326xmlGenericError(xmlGenericErrorContext,
3327 "indicating the version: %s, thanks !\n", xmlParserVersion);
3328 NEXT;
3329 name = xmlScanName(ctxt);
3330 if (name == NULL) {
3331 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 ctxt->token = '&';
3337 return;
3338 }
3339 if (NXT(xmlStrlen(name)) != ';') {
3340 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3342 ctxt->sax->error(ctxt->userData,
3343 "Entity reference: ';' expected\n");
3344 ctxt->wellFormed = 0;
3345 ctxt->disableSAX = 1;
3346 ctxt->token = '&';
3347 xmlFree(name);
3348 return;
3349 }
3350 SKIP(xmlStrlen(name) + 1);
3351 if (ctxt->sax != NULL) {
3352 if (ctxt->sax->getEntity != NULL)
3353 ent = ctxt->sax->getEntity(ctxt->userData, name);
3354 }
3355
3356 /*
3357 * [ WFC: Entity Declared ]
3358 * the Name given in the entity reference must match that in an entity
3359 * declaration, except that well-formed documents need not declare any
3360 * of the following entities: amp, lt, gt, apos, quot.
3361 */
3362 if (ent == NULL)
3363 ent = xmlGetPredefinedEntity(name);
3364 if (ent == NULL) {
3365 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3367 ctxt->sax->error(ctxt->userData,
3368 "Entity reference: entity %s not declared\n",
3369 name);
3370 ctxt->wellFormed = 0;
3371 ctxt->disableSAX = 1;
3372 xmlFree(name);
3373 return;
3374 }
3375
3376 /*
3377 * [ WFC: Parsed Entity ]
3378 * An entity reference must not contain the name of an unparsed entity
3379 */
3380 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3381 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Entity reference to unparsed entity %s\n", name);
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388
3389 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3390 ctxt->token = ent->content[0];
3391 xmlFree(name);
3392 return;
3393 }
3394 input = xmlNewEntityInputStream(ctxt, ent);
3395 xmlPushInput(ctxt, input);
3396 xmlFree(name);
3397#endif
3398 return;
3399}
3400
3401/**
3402 * xmlHandleEntity:
3403 * @ctxt: an XML parser context
3404 * @entity: an XML entity pointer.
3405 *
3406 * Default handling of defined entities, when should we define a new input
3407 * stream ? When do we just handle that as a set of chars ?
3408 *
3409 * OBSOLETE: to be removed at some point.
3410 */
3411
3412void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003413xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003414 static int deprecated = 0;
3415 if (!deprecated) {
3416 xmlGenericError(xmlGenericErrorContext,
3417 "xmlHandleEntity() deprecated function reached\n");
3418 deprecated = 1;
3419 }
3420
3421#if 0
3422 int len;
3423 xmlParserInputPtr input;
3424
3425 if (entity->content == NULL) {
3426 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3428 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3429 entity->name);
3430 ctxt->wellFormed = 0;
3431 ctxt->disableSAX = 1;
3432 return;
3433 }
3434 len = xmlStrlen(entity->content);
3435 if (len <= 2) goto handle_as_char;
3436
3437 /*
3438 * Redefine its content as an input stream.
3439 */
3440 input = xmlNewEntityInputStream(ctxt, entity);
3441 xmlPushInput(ctxt, input);
3442 return;
3443
3444handle_as_char:
3445 /*
3446 * Just handle the content as a set of chars.
3447 */
3448 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3449 (ctxt->sax->characters != NULL))
3450 ctxt->sax->characters(ctxt->userData, entity->content, len);
3451#endif
3452}
3453
3454/**
3455 * xmlNewGlobalNs:
3456 * @doc: the document carrying the namespace
3457 * @href: the URI associated
3458 * @prefix: the prefix for the namespace
3459 *
3460 * Creation of a Namespace, the old way using PI and without scoping
3461 * DEPRECATED !!!
3462 * It now create a namespace on the root element of the document if found.
3463 * Returns NULL this functionnality had been removed
3464 */
3465xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003466xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3467 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003468 static int deprecated = 0;
3469 if (!deprecated) {
3470 xmlGenericError(xmlGenericErrorContext,
3471 "xmlNewGlobalNs() deprecated function reached\n");
3472 deprecated = 1;
3473 }
3474 return(NULL);
3475#if 0
3476 xmlNodePtr root;
3477
3478 xmlNsPtr cur;
3479
3480 root = xmlDocGetRootElement(doc);
3481 if (root != NULL)
3482 return(xmlNewNs(root, href, prefix));
3483
3484 /*
3485 * if there is no root element yet, create an old Namespace type
3486 * and it will be moved to the root at save time.
3487 */
3488 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3489 if (cur == NULL) {
3490 xmlGenericError(xmlGenericErrorContext,
3491 "xmlNewGlobalNs : malloc failed\n");
3492 return(NULL);
3493 }
3494 memset(cur, 0, sizeof(xmlNs));
3495 cur->type = XML_GLOBAL_NAMESPACE;
3496
3497 if (href != NULL)
3498 cur->href = xmlStrdup(href);
3499 if (prefix != NULL)
3500 cur->prefix = xmlStrdup(prefix);
3501
3502 /*
3503 * Add it at the end to preserve parsing order ...
3504 */
3505 if (doc != NULL) {
3506 if (doc->oldNs == NULL) {
3507 doc->oldNs = cur;
3508 } else {
3509 xmlNsPtr prev = doc->oldNs;
3510
3511 while (prev->next != NULL) prev = prev->next;
3512 prev->next = cur;
3513 }
3514 }
3515
3516 return(NULL);
3517#endif
3518}
3519
3520/**
3521 * xmlUpgradeOldNs:
3522 * @doc: a document pointer
3523 *
3524 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3525 * DEPRECATED
3526 */
3527void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003528xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003529 static int deprecated = 0;
3530 if (!deprecated) {
3531 xmlGenericError(xmlGenericErrorContext,
3532 "xmlNewGlobalNs() deprecated function reached\n");
3533 deprecated = 1;
3534 }
3535#if 0
3536 xmlNsPtr cur;
3537
3538 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3539 if (doc->children == NULL) {
3540#ifdef DEBUG_TREE
3541 xmlGenericError(xmlGenericErrorContext,
3542 "xmlUpgradeOldNs: failed no root !\n");
3543#endif
3544 return;
3545 }
3546
3547 cur = doc->oldNs;
3548 while (cur->next != NULL) {
3549 cur->type = XML_LOCAL_NAMESPACE;
3550 cur = cur->next;
3551 }
3552 cur->type = XML_LOCAL_NAMESPACE;
3553 cur->next = doc->children->nsDef;
3554 doc->children->nsDef = doc->oldNs;
3555 doc->oldNs = NULL;
3556#endif
3557}
3558