blob: 84d6eef8edb409382435f605622bc52b183b5c2a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
Owen Taylor3473f882001-02-23 17:55:21 +0000415static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
432};
433
Daniel Veillard01c13b52002-12-10 15:19:08 +0000434/**
435 * xmlIsBaseChar:
436 * @c: an unicode character (int)
437 *
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
440 *
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
443 * and
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
445 *
446 * Returns 0 if not, non-zero otherwise
447 */
Owen Taylor3473f882001-02-23 17:55:21 +0000448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
Daniel Veillard01c13b52002-12-10 15:19:08 +0000894static
Owen Taylor3473f882001-02-23 17:55:21 +0000895void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
899 }
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
903 }
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
907 }
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
911}
912
913#else
914#define CHECK_BUFFER(in)
915#endif
916
917
918/**
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
922 *
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
925 *
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
927 * end of this entity
928 */
929int
930xmlParserInputRead(xmlParserInputPtr in, int len) {
931 int ret;
932 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000933 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000934
935#ifdef DEBUG_INPUT
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
937#endif
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
943
944 CHECK_BUFFER(in);
945
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
948 if (ret > 0) {
949 in->cur -= ret;
950 in->consumed += ret;
951 }
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
954 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000955 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000956 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000957 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000958 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000959 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000960 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000962
963 CHECK_BUFFER(in);
964
965 return(ret);
966}
967
968/**
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
972 *
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
975 *
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977 * end of this entity
978 */
979int
980xmlParserInputGrow(xmlParserInputPtr in, int len) {
981 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000982 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000983
984#ifdef DEBUG_INPUT
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
986#endif
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
991
992 CHECK_BUFFER(in);
993
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000996
997 CHECK_BUFFER(in);
998
999 return(0);
1000 }
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1003 else
1004 return(0);
1005
1006 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1011 */
1012 if (in->base != in->buf->buffer->content) {
1013 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001015 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001016 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001017 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001018 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001019 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001037 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001038
1039#ifdef DEBUG_INPUT
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
1050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001052 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001053 */
Daniel Veillarda880b122003-04-21 21:36:41 +00001054#if 0
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001055 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001056 return;
Daniel Veillarda880b122003-04-21 21:36:41 +00001057#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 if (ret > 0) {
1061 in->cur -= ret;
1062 in->consumed += ret;
1063 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066
1067 CHECK_BUFFER(in);
1068
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 return;
1071 }
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1074 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001075 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001076 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001077 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001078 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001080 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001082
1083 CHECK_BUFFER(in);
1084}
1085
1086/************************************************************************
1087 * *
1088 * UTF8 character input and related functions *
1089 * *
1090 ************************************************************************/
1091
1092/**
1093 * xmlNextChar:
1094 * @ctxt: the XML parser context
1095 *
1096 * Skip to the next char input char.
1097 */
1098
1099void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001100xmlNextChar(xmlParserCtxtPtr ctxt)
1101{
Owen Taylor3473f882001-02-23 17:55:21 +00001102 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001103 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001104
Daniel Veillardfdc91562002-07-01 21:52:03 +00001105 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001106 if ((*ctxt->input->cur == 0) &&
1107 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1108 (ctxt->instate != XML_PARSER_COMMENT)) {
1109 /*
1110 * If we are at the end of the current entity and
1111 * the context allows it, we pop consumed entities
1112 * automatically.
1113 * the auto closing should be blocked in other cases
1114 */
1115 xmlPopInput(ctxt);
1116 } else {
1117 const unsigned char *cur;
1118 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001119
Daniel Veillard77a90a72003-03-22 00:04:05 +00001120 /*
1121 * 2.11 End-of-Line Handling
1122 * the literal two-character sequence "#xD#xA" or a standalone
1123 * literal #xD, an XML processor must pass to the application
1124 * the single character #xA.
1125 */
1126 if (*(ctxt->input->cur) == '\n') {
1127 ctxt->input->line++;
1128 ctxt->input->col = 1;
1129 } else
1130 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001131
Daniel Veillard77a90a72003-03-22 00:04:05 +00001132 /*
1133 * We are supposed to handle UTF8, check it's valid
1134 * From rfc2044: encoding of the Unicode values on UTF-8:
1135 *
1136 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1137 * 0000 0000-0000 007F 0xxxxxxx
1138 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1139 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1140 *
1141 * Check for the 0x110000 limit too
1142 */
1143 cur = ctxt->input->cur;
1144
1145 c = *cur;
1146 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001147 if (c == 0xC0)
1148 goto encoding_error;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001149 if (cur[1] == 0)
1150 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1151 if ((cur[1] & 0xc0) != 0x80)
1152 goto encoding_error;
1153 if ((c & 0xe0) == 0xe0) {
1154 unsigned int val;
1155
1156 if (cur[2] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if ((cur[2] & 0xc0) != 0x80)
1159 goto encoding_error;
1160 if ((c & 0xf0) == 0xf0) {
1161 if (cur[3] == 0)
1162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1163 if (((c & 0xf8) != 0xf0) ||
1164 ((cur[3] & 0xc0) != 0x80))
1165 goto encoding_error;
1166 /* 4-byte code */
1167 ctxt->input->cur += 4;
1168 val = (cur[0] & 0x7) << 18;
1169 val |= (cur[1] & 0x3f) << 12;
1170 val |= (cur[2] & 0x3f) << 6;
1171 val |= cur[3] & 0x3f;
1172 } else {
1173 /* 3-byte code */
1174 ctxt->input->cur += 3;
1175 val = (cur[0] & 0xf) << 12;
1176 val |= (cur[1] & 0x3f) << 6;
1177 val |= cur[2] & 0x3f;
1178 }
1179 if (((val > 0xd7ff) && (val < 0xe000)) ||
1180 ((val > 0xfffd) && (val < 0x10000)) ||
1181 (val >= 0x110000)) {
1182 if ((ctxt->sax != NULL) &&
1183 (ctxt->sax->error != NULL))
1184 ctxt->sax->error(ctxt->userData,
1185 "Char 0x%X out of allowed range\n",
1186 val);
1187 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1188 ctxt->wellFormed = 0;
1189 if (ctxt->recovery == 0)
1190 ctxt->disableSAX = 1;
1191 }
1192 } else
1193 /* 2-byte code */
1194 ctxt->input->cur += 2;
1195 } else
1196 /* 1-byte code */
1197 ctxt->input->cur++;
1198
1199 ctxt->nbChars++;
1200 if (*ctxt->input->cur == 0)
1201 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202 }
Owen Taylor3473f882001-02-23 17:55:21 +00001203 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001204 /*
1205 * Assume it's a fixed length encoding (1) with
1206 * a compatible encoding for the ASCII set, since
1207 * XML constructs only use < 128 chars
1208 */
1209
1210 if (*(ctxt->input->cur) == '\n') {
1211 ctxt->input->line++;
1212 ctxt->input->col = 1;
1213 } else
1214 ctxt->input->col++;
1215 ctxt->input->cur++;
1216 ctxt->nbChars++;
1217 if (*ctxt->input->cur == 0)
1218 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001219 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001220 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001221 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001222 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001223 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001224 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001225 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001226 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001227 /*
1228 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001229 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001230 * declaration header. Report the error and switch the encoding
1231 * to ISO-Latin-1 (if you don't like this policy, just declare the
1232 * encoding !)
1233 */
1234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001235 ctxt->sax->error(ctxt->userData,
1236 "Input is not proper UTF-8, indicate encoding !\n");
1237 ctxt->sax->error(ctxt->userData,
1238 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1239 ctxt->input->cur[0], ctxt->input->cur[1],
1240 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001241 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001242 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001243 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1244
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001246 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001247 return;
1248}
1249
1250/**
1251 * xmlCurrentChar:
1252 * @ctxt: the XML parser context
1253 * @len: pointer to the length of the char read
1254 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001255 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001256 * bytes in the input buffer. Implement the end of line normalization:
1257 * 2.11 End-of-Line Handling
1258 * Wherever an external parsed entity or the literal entity value
1259 * of an internal parsed entity contains either the literal two-character
1260 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1261 * must pass to the application the single character #xA.
1262 * This behavior can conveniently be produced by normalizing all
1263 * line breaks to #xA on input, before parsing.)
1264 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001265 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001266 */
1267
1268int
1269xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1270 if (ctxt->instate == XML_PARSER_EOF)
1271 return(0);
1272
Daniel Veillard561b7f82002-03-20 21:55:57 +00001273 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1274 *len = 1;
1275 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001276 }
1277 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1278 /*
1279 * We are supposed to handle UTF8, check it's valid
1280 * From rfc2044: encoding of the Unicode values on UTF-8:
1281 *
1282 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1283 * 0000 0000-0000 007F 0xxxxxxx
1284 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1285 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1286 *
1287 * Check for the 0x110000 limit too
1288 */
1289 const unsigned char *cur = ctxt->input->cur;
1290 unsigned char c;
1291 unsigned int val;
1292
1293 c = *cur;
1294 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001295 if (c == 0xC0)
1296 goto encoding_error;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001297 if (cur[1] == 0)
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001300 goto encoding_error;
1301 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001302
1303 if (cur[2] == 0)
1304 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1305 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001306 goto encoding_error;
1307 if ((c & 0xf0) == 0xf0) {
1308 if (cur[3] == 0)
1309 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001310 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001311 ((cur[3] & 0xc0) != 0x80))
1312 goto encoding_error;
1313 /* 4-byte code */
1314 *len = 4;
1315 val = (cur[0] & 0x7) << 18;
1316 val |= (cur[1] & 0x3f) << 12;
1317 val |= (cur[2] & 0x3f) << 6;
1318 val |= cur[3] & 0x3f;
1319 } else {
1320 /* 3-byte code */
1321 *len = 3;
1322 val = (cur[0] & 0xf) << 12;
1323 val |= (cur[1] & 0x3f) << 6;
1324 val |= cur[2] & 0x3f;
1325 }
1326 } else {
1327 /* 2-byte code */
1328 *len = 2;
1329 val = (cur[0] & 0x1f) << 6;
1330 val |= cur[1] & 0x3f;
1331 }
1332 if (!IS_CHAR(val)) {
1333 if ((ctxt->sax != NULL) &&
1334 (ctxt->sax->error != NULL))
1335 ctxt->sax->error(ctxt->userData,
1336 "Char 0x%X out of allowed range\n", val);
1337 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1338 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001339 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001340 }
1341 return(val);
1342 } else {
1343 /* 1-byte code */
1344 *len = 1;
1345 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001346 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001347 ctxt->nbChars++;
1348 ctxt->input->cur++;
1349 }
1350 return(0xA);
1351 }
1352 return((int) *ctxt->input->cur);
1353 }
1354 }
1355 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001356 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001357 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001358 * XML constructs only use < 128 chars
1359 */
1360 *len = 1;
1361 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001362 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001363 ctxt->nbChars++;
1364 ctxt->input->cur++;
1365 }
1366 return(0xA);
1367 }
1368 return((int) *ctxt->input->cur);
1369encoding_error:
1370 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001371 * An encoding problem may arise from a truncated input buffer
1372 * splitting a character in the middle. In that case do not raise
1373 * an error but return 0 to endicate an end of stream problem
1374 */
1375 if (ctxt->input->end - ctxt->input->cur < 4) {
1376 *len = 0;
1377 return(0);
1378 }
1379
1380 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001381 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001382 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001383 * declaration header. Report the error and switch the encoding
1384 * to ISO-Latin-1 (if you don't like this policy, just declare the
1385 * encoding !)
1386 */
1387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1388 ctxt->sax->error(ctxt->userData,
1389 "Input is not proper UTF-8, indicate encoding !\n");
1390 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001391 ctxt->input->cur[0], ctxt->input->cur[1],
1392 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001393 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001394 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001395 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1396
1397 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1398 *len = 1;
1399 return((int) *ctxt->input->cur);
1400}
1401
1402/**
1403 * xmlStringCurrentChar:
1404 * @ctxt: the XML parser context
1405 * @cur: pointer to the beginning of the char
1406 * @len: pointer to the length of the char read
1407 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001408 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001409 * bytes in the input buffer.
1410 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001411 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001412 */
1413
1414int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001415xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1416{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001417 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001418 /*
1419 * We are supposed to handle UTF8, check it's valid
1420 * From rfc2044: encoding of the Unicode values on UTF-8:
1421 *
1422 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1423 * 0000 0000-0000 007F 0xxxxxxx
1424 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1425 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1426 *
1427 * Check for the 0x110000 limit too
1428 */
1429 unsigned char c;
1430 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001431
Daniel Veillardd8224e02002-01-13 15:43:22 +00001432 c = *cur;
1433 if (c & 0x80) {
1434 if ((cur[1] & 0xc0) != 0x80)
1435 goto encoding_error;
1436 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001437
Daniel Veillardd8224e02002-01-13 15:43:22 +00001438 if ((cur[2] & 0xc0) != 0x80)
1439 goto encoding_error;
1440 if ((c & 0xf0) == 0xf0) {
1441 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1442 goto encoding_error;
1443 /* 4-byte code */
1444 *len = 4;
1445 val = (cur[0] & 0x7) << 18;
1446 val |= (cur[1] & 0x3f) << 12;
1447 val |= (cur[2] & 0x3f) << 6;
1448 val |= cur[3] & 0x3f;
1449 } else {
1450 /* 3-byte code */
1451 *len = 3;
1452 val = (cur[0] & 0xf) << 12;
1453 val |= (cur[1] & 0x3f) << 6;
1454 val |= cur[2] & 0x3f;
1455 }
1456 } else {
1457 /* 2-byte code */
1458 *len = 2;
1459 val = (cur[0] & 0x1f) << 6;
1460 val |= cur[1] & 0x3f;
1461 }
1462 if (!IS_CHAR(val)) {
1463 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1464 (ctxt->sax->error != NULL))
1465 ctxt->sax->error(ctxt->userData,
1466 "Char 0x%X out of allowed range\n",
1467 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001468 if (ctxt != NULL) {
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470 ctxt->wellFormed = 0;
1471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1472 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001473 }
1474 return (val);
1475 } else {
1476 /* 1-byte code */
1477 *len = 1;
1478 return ((int) *cur);
1479 }
Owen Taylor3473f882001-02-23 17:55:21 +00001480 }
1481 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001482 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001483 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * XML constructs only use < 128 chars
1485 */
1486 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001487 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001488encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001489
Owen Taylor3473f882001-02-23 17:55:21 +00001490 /*
1491 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001493 * declaration header. Report the error and switch the encoding
1494 * to ISO-Latin-1 (if you don't like this policy, just declare the
1495 * encoding !)
1496 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001497 if (ctxt != NULL) {
1498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1499 ctxt->sax->error(ctxt->userData,
1500 "Input is not proper UTF-8, indicate encoding !\n");
1501 ctxt->sax->error(ctxt->userData,
1502 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1503 ctxt->input->cur[0], ctxt->input->cur[1],
1504 ctxt->input->cur[2], ctxt->input->cur[3]);
1505 }
1506 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001507 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001508 }
Owen Taylor3473f882001-02-23 17:55:21 +00001509
1510 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001511 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001512}
1513
1514/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001515 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001516 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001517 * @val: the char value
1518 *
1519 * append the char value in the array
1520 *
1521 * Returns the number of xmlChar written
1522 */
Owen Taylor3473f882001-02-23 17:55:21 +00001523int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001524xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001525 /*
1526 * We are supposed to handle UTF8, check it's valid
1527 * From rfc2044: encoding of the Unicode values on UTF-8:
1528 *
1529 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1530 * 0000 0000-0000 007F 0xxxxxxx
1531 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1532 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1533 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001534 if (val >= 0x80) {
1535 xmlChar *savedout = out;
1536 int bits;
1537 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1538 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1539 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1540 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001541 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001542 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001543 val);
1544 return(0);
1545 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001546 for ( ; bits >= 0; bits-= 6)
1547 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1548 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001549 }
1550 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001551 return 1;
1552}
1553
1554/**
1555 * xmlCopyChar:
1556 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001557 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001558 * @val: the char value
1559 *
1560 * append the char value in the array
1561 *
1562 * Returns the number of xmlChar written
1563 */
1564
1565int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001566xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001567 /* the len parameter is ignored */
1568 if (val >= 0x80) {
1569 return(xmlCopyCharMultiByte (out, val));
1570 }
1571 *out = (xmlChar) val;
1572 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001573}
1574
1575/************************************************************************
1576 * *
1577 * Commodity functions to switch encodings *
1578 * *
1579 ************************************************************************/
1580
1581/**
1582 * xmlSwitchEncoding:
1583 * @ctxt: the parser context
1584 * @enc: the encoding value (number)
1585 *
1586 * change the input functions when discovering the character encoding
1587 * of a given entity.
1588 *
1589 * Returns 0 in case of success, -1 otherwise
1590 */
1591int
1592xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1593{
1594 xmlCharEncodingHandlerPtr handler;
1595
1596 switch (enc) {
1597 case XML_CHAR_ENCODING_ERROR:
1598 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1600 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001603 break;
1604 case XML_CHAR_ENCODING_NONE:
1605 /* let's assume it's UTF-8 without the XML decl */
1606 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1607 return(0);
1608 case XML_CHAR_ENCODING_UTF8:
1609 /* default encoding, no conversion should be needed */
1610 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001611
1612 /*
1613 * Errata on XML-1.0 June 20 2001
1614 * Specific handling of the Byte Order Mark for
1615 * UTF-8
1616 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001617 if ((ctxt->input != NULL) &&
1618 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001619 (ctxt->input->cur[1] == 0xBB) &&
1620 (ctxt->input->cur[2] == 0xBF)) {
1621 ctxt->input->cur += 3;
1622 }
Owen Taylor3473f882001-02-23 17:55:21 +00001623 return(0);
Daniel Veillard2dcb9372003-07-16 21:18:19 +00001624 case XML_CHAR_ENCODING_UTF16LE:
1625 case XML_CHAR_ENCODING_UTF16BE:
1626 /*The raw input characters are encoded
1627 *in UTF-16. As we expect this function
1628 *to be called after xmlCharEncInFunc, we expect
1629 *ctxt->input->cur to contain UTF-8 encoded characters.
1630 *So the raw UTF16 Byte Order Mark
1631 *has also been converted into
1632 *an UTF-8 BOM. Let's skip that BOM.
1633 */
1634 if ((ctxt->input != NULL) &&
1635 (ctxt->input->cur[0] == 0xEF) &&
1636 (ctxt->input->cur[1] == 0xBB) &&
1637 (ctxt->input->cur[2] == 0xBF)) {
1638 ctxt->input->cur += 3;
1639 }
1640 break ;
Owen Taylor3473f882001-02-23 17:55:21 +00001641 default:
1642 break;
1643 }
1644 handler = xmlGetCharEncodingHandler(enc);
1645 if (handler == NULL) {
1646 /*
1647 * Default handlers.
1648 */
1649 switch (enc) {
1650 case XML_CHAR_ENCODING_ERROR:
1651 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1653 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001656 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1657 break;
1658 case XML_CHAR_ENCODING_NONE:
1659 /* let's assume it's UTF-8 without the XML decl */
1660 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1661 return(0);
1662 case XML_CHAR_ENCODING_UTF8:
1663 case XML_CHAR_ENCODING_ASCII:
1664 /* default encoding, no conversion should be needed */
1665 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1666 return(0);
1667 case XML_CHAR_ENCODING_UTF16LE:
1668 break;
1669 case XML_CHAR_ENCODING_UTF16BE:
1670 break;
1671 case XML_CHAR_ENCODING_UCS4LE:
1672 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1674 ctxt->sax->error(ctxt->userData,
1675 "char encoding USC4 little endian not supported\n");
1676 break;
1677 case XML_CHAR_ENCODING_UCS4BE:
1678 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680 ctxt->sax->error(ctxt->userData,
1681 "char encoding USC4 big endian not supported\n");
1682 break;
1683 case XML_CHAR_ENCODING_EBCDIC:
1684 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686 ctxt->sax->error(ctxt->userData,
1687 "char encoding EBCDIC not supported\n");
1688 break;
1689 case XML_CHAR_ENCODING_UCS4_2143:
1690 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692 ctxt->sax->error(ctxt->userData,
1693 "char encoding UCS4 2143 not supported\n");
1694 break;
1695 case XML_CHAR_ENCODING_UCS4_3412:
1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt->userData,
1699 "char encoding UCS4 3412 not supported\n");
1700 break;
1701 case XML_CHAR_ENCODING_UCS2:
1702 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1704 ctxt->sax->error(ctxt->userData,
1705 "char encoding UCS2 not supported\n");
1706 break;
1707 case XML_CHAR_ENCODING_8859_1:
1708 case XML_CHAR_ENCODING_8859_2:
1709 case XML_CHAR_ENCODING_8859_3:
1710 case XML_CHAR_ENCODING_8859_4:
1711 case XML_CHAR_ENCODING_8859_5:
1712 case XML_CHAR_ENCODING_8859_6:
1713 case XML_CHAR_ENCODING_8859_7:
1714 case XML_CHAR_ENCODING_8859_8:
1715 case XML_CHAR_ENCODING_8859_9:
1716 /*
1717 * We used to keep the internal content in the
1718 * document encoding however this turns being unmaintainable
1719 * So xmlGetCharEncodingHandler() will return non-null
1720 * values for this now.
1721 */
1722 if ((ctxt->inputNr == 1) &&
1723 (ctxt->encoding == NULL) &&
1724 (ctxt->input->encoding != NULL)) {
1725 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1726 }
1727 ctxt->charset = enc;
1728 return(0);
1729 case XML_CHAR_ENCODING_2022_JP:
1730 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "char encoding ISO-2022-JPnot supported\n");
1734 break;
1735 case XML_CHAR_ENCODING_SHIFT_JIS:
1736 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738 ctxt->sax->error(ctxt->userData,
1739 "char encoding Shift_JIS not supported\n");
1740 break;
1741 case XML_CHAR_ENCODING_EUC_JP:
1742 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "char encoding EUC-JPnot supported\n");
1746 break;
1747 }
1748 }
1749 if (handler == NULL)
1750 return(-1);
1751 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1752 return(xmlSwitchToEncoding(ctxt, handler));
1753}
1754
1755/**
1756 * xmlSwitchToEncoding:
1757 * @ctxt: the parser context
1758 * @handler: the encoding handler
1759 *
1760 * change the input functions when discovering the character encoding
1761 * of a given entity.
1762 *
1763 * Returns 0 in case of success, -1 otherwise
1764 */
1765int
1766xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1767{
1768 int nbchars;
1769
1770 if (handler != NULL) {
1771 if (ctxt->input != NULL) {
1772 if (ctxt->input->buf != NULL) {
1773 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001774 /*
1775 * Check in case the auto encoding detetection triggered
1776 * in already.
1777 */
Owen Taylor3473f882001-02-23 17:55:21 +00001778 if (ctxt->input->buf->encoder == handler)
1779 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001780
1781 /*
1782 * "UTF-16" can be used for both LE and BE
Daniel Veillard878eab02002-02-19 13:46:09 +00001783 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1784 BAD_CAST "UTF-16", 6)) &&
1785 (!xmlStrncmp(BAD_CAST handler->name,
1786 BAD_CAST "UTF-16", 6))) {
1787 return(0);
1788 }
Daniel Veillarda6874ca2003-07-29 16:47:24 +00001789 */
Daniel Veillard878eab02002-02-19 13:46:09 +00001790
Owen Taylor3473f882001-02-23 17:55:21 +00001791 /*
1792 * Note: this is a bit dangerous, but that's what it
1793 * takes to use nearly compatible signature for different
1794 * encodings.
1795 */
1796 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1797 ctxt->input->buf->encoder = handler;
1798 return(0);
1799 }
1800 ctxt->input->buf->encoder = handler;
1801
1802 /*
1803 * Is there already some content down the pipe to convert ?
1804 */
1805 if ((ctxt->input->buf->buffer != NULL) &&
1806 (ctxt->input->buf->buffer->use > 0)) {
1807 int processed;
1808
1809 /*
1810 * Specific handling of the Byte Order Mark for
1811 * UTF-16
1812 */
1813 if ((handler->name != NULL) &&
1814 (!strcmp(handler->name, "UTF-16LE")) &&
1815 (ctxt->input->cur[0] == 0xFF) &&
1816 (ctxt->input->cur[1] == 0xFE)) {
1817 ctxt->input->cur += 2;
1818 }
1819 if ((handler->name != NULL) &&
1820 (!strcmp(handler->name, "UTF-16BE")) &&
1821 (ctxt->input->cur[0] == 0xFE) &&
1822 (ctxt->input->cur[1] == 0xFF)) {
1823 ctxt->input->cur += 2;
1824 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001825 /*
1826 * Errata on XML-1.0 June 20 2001
1827 * Specific handling of the Byte Order Mark for
1828 * UTF-8
1829 */
1830 if ((handler->name != NULL) &&
1831 (!strcmp(handler->name, "UTF-8")) &&
1832 (ctxt->input->cur[0] == 0xEF) &&
1833 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001834 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001835 ctxt->input->cur += 3;
1836 }
Owen Taylor3473f882001-02-23 17:55:21 +00001837
1838 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001839 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001840 * Move it as the raw buffer and create a new input buffer
1841 */
1842 processed = ctxt->input->cur - ctxt->input->base;
1843 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1844 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1845 ctxt->input->buf->buffer = xmlBufferCreate();
1846
1847 if (ctxt->html) {
1848 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001849 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001850 */
1851 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1852 ctxt->input->buf->buffer,
1853 ctxt->input->buf->raw);
1854 } else {
1855 /*
1856 * convert just enough to get
1857 * '<?xml version="1.0" encoding="xxx"?>'
1858 * parsed with the autodetected encoding
1859 * into the parser reading buffer.
1860 */
1861 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1862 ctxt->input->buf->buffer,
1863 ctxt->input->buf->raw);
1864 }
1865 if (nbchars < 0) {
1866 xmlGenericError(xmlGenericErrorContext,
1867 "xmlSwitchToEncoding: encoder error\n");
1868 return(-1);
1869 }
1870 ctxt->input->base =
1871 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001872 ctxt->input->end =
1873 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001874
1875 }
1876 return(0);
1877 } else {
1878 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1879 /*
1880 * When parsing a static memory array one must know the
1881 * size to be able to convert the buffer.
1882 */
1883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1884 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001885 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001886 return(-1);
1887 } else {
1888 int processed;
1889
1890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001891 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001892 * Move it as the raw buffer and create a new input buffer
1893 */
1894 processed = ctxt->input->cur - ctxt->input->base;
1895
1896 ctxt->input->buf->raw = xmlBufferCreate();
1897 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1898 ctxt->input->length - processed);
1899 ctxt->input->buf->buffer = xmlBufferCreate();
1900
1901 /*
1902 * convert as much as possible of the raw input
1903 * to the parser reading buffer.
1904 */
1905 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1906 ctxt->input->buf->buffer,
1907 ctxt->input->buf->raw);
1908 if (nbchars < 0) {
1909 xmlGenericError(xmlGenericErrorContext,
1910 "xmlSwitchToEncoding: encoder error\n");
1911 return(-1);
1912 }
1913
1914 /*
1915 * Conversion succeeded, get rid of the old buffer
1916 */
1917 if ((ctxt->input->free != NULL) &&
1918 (ctxt->input->base != NULL))
1919 ctxt->input->free((xmlChar *) ctxt->input->base);
1920 ctxt->input->base =
1921 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001922 ctxt->input->end =
1923 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001924 }
1925 }
1926 } else {
1927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1928 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001929 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 return(-1);
1931 }
1932 /*
1933 * The parsing is now done in UTF8 natively
1934 */
1935 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1936 } else
1937 return(-1);
1938 return(0);
1939
1940}
1941
1942/************************************************************************
1943 * *
1944 * Commodity functions to handle entities processing *
1945 * *
1946 ************************************************************************/
1947
1948/**
1949 * xmlFreeInputStream:
1950 * @input: an xmlParserInputPtr
1951 *
1952 * Free up an input stream.
1953 */
1954void
1955xmlFreeInputStream(xmlParserInputPtr input) {
1956 if (input == NULL) return;
1957
1958 if (input->filename != NULL) xmlFree((char *) input->filename);
1959 if (input->directory != NULL) xmlFree((char *) input->directory);
1960 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1961 if (input->version != NULL) xmlFree((char *) input->version);
1962 if ((input->free != NULL) && (input->base != NULL))
1963 input->free((xmlChar *) input->base);
1964 if (input->buf != NULL)
1965 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 xmlFree(input);
1967}
1968
1969/**
1970 * xmlNewInputStream:
1971 * @ctxt: an XML parser context
1972 *
1973 * Create a new input stream structure
1974 * Returns the new input stream or NULL
1975 */
1976xmlParserInputPtr
1977xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1978 xmlParserInputPtr input;
1979
1980 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1981 if (input == NULL) {
1982 if (ctxt != NULL) {
1983 ctxt->errNo = XML_ERR_NO_MEMORY;
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "malloc: couldn't allocate a new input stream\n");
1987 ctxt->errNo = XML_ERR_NO_MEMORY;
1988 }
1989 return(NULL);
1990 }
1991 memset(input, 0, sizeof(xmlParserInput));
1992 input->line = 1;
1993 input->col = 1;
1994 input->standalone = -1;
1995 return(input);
1996}
1997
1998/**
1999 * xmlNewIOInputStream:
2000 * @ctxt: an XML parser context
2001 * @input: an I/O Input
2002 * @enc: the charset encoding if known
2003 *
2004 * Create a new input stream structure encapsulating the @input into
2005 * a stream suitable for the parser.
2006 *
2007 * Returns the new input stream or NULL
2008 */
2009xmlParserInputPtr
2010xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
2011 xmlCharEncoding enc) {
2012 xmlParserInputPtr inputStream;
2013
2014 if (xmlParserDebugEntities)
2015 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
2016 inputStream = xmlNewInputStream(ctxt);
2017 if (inputStream == NULL) {
2018 return(NULL);
2019 }
2020 inputStream->filename = NULL;
2021 inputStream->buf = input;
2022 inputStream->base = inputStream->buf->buffer->content;
2023 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002024 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002025 if (enc != XML_CHAR_ENCODING_NONE) {
2026 xmlSwitchEncoding(ctxt, enc);
2027 }
2028
2029 return(inputStream);
2030}
2031
2032/**
2033 * xmlNewEntityInputStream:
2034 * @ctxt: an XML parser context
2035 * @entity: an Entity pointer
2036 *
2037 * Create a new input stream based on an xmlEntityPtr
2038 *
2039 * Returns the new input stream or NULL
2040 */
2041xmlParserInputPtr
2042xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2043 xmlParserInputPtr input;
2044
2045 if (entity == NULL) {
2046 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2048 ctxt->sax->error(ctxt->userData,
2049 "internal: xmlNewEntityInputStream entity = NULL\n");
2050 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2051 return(NULL);
2052 }
2053 if (xmlParserDebugEntities)
2054 xmlGenericError(xmlGenericErrorContext,
2055 "new input from entity: %s\n", entity->name);
2056 if (entity->content == NULL) {
2057 switch (entity->etype) {
2058 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2059 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2061 ctxt->sax->error(ctxt->userData,
2062 "xmlNewEntityInputStream unparsed entity !\n");
2063 break;
2064 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2065 case XML_EXTERNAL_PARAMETER_ENTITY:
2066 return(xmlLoadExternalEntity((char *) entity->URI,
2067 (char *) entity->ExternalID, ctxt));
2068 case XML_INTERNAL_GENERAL_ENTITY:
2069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2070 ctxt->sax->error(ctxt->userData,
2071 "Internal entity %s without content !\n", entity->name);
2072 break;
2073 case XML_INTERNAL_PARAMETER_ENTITY:
2074 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2076 ctxt->sax->error(ctxt->userData,
2077 "Internal parameter entity %s without content !\n", entity->name);
2078 break;
2079 case XML_INTERNAL_PREDEFINED_ENTITY:
2080 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082 ctxt->sax->error(ctxt->userData,
2083 "Predefined entity %s without content !\n", entity->name);
2084 break;
2085 }
2086 return(NULL);
2087 }
2088 input = xmlNewInputStream(ctxt);
2089 if (input == NULL) {
2090 return(NULL);
2091 }
2092 input->filename = (char *) entity->URI;
2093 input->base = entity->content;
2094 input->cur = entity->content;
2095 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002096 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002097 return(input);
2098}
2099
2100/**
2101 * xmlNewStringInputStream:
2102 * @ctxt: an XML parser context
2103 * @buffer: an memory buffer
2104 *
2105 * Create a new input stream based on a memory buffer.
2106 * Returns the new input stream
2107 */
2108xmlParserInputPtr
2109xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2110 xmlParserInputPtr input;
2111
2112 if (buffer == NULL) {
2113 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2115 ctxt->sax->error(ctxt->userData,
2116 "internal: xmlNewStringInputStream string = NULL\n");
2117 return(NULL);
2118 }
2119 if (xmlParserDebugEntities)
2120 xmlGenericError(xmlGenericErrorContext,
2121 "new fixed input: %.30s\n", buffer);
2122 input = xmlNewInputStream(ctxt);
2123 if (input == NULL) {
2124 return(NULL);
2125 }
2126 input->base = buffer;
2127 input->cur = buffer;
2128 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002129 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002130 return(input);
2131}
2132
2133/**
2134 * xmlNewInputFromFile:
2135 * @ctxt: an XML parser context
2136 * @filename: the filename to use as entity
2137 *
2138 * Create a new input stream based on a file.
2139 *
2140 * Returns the new input stream or NULL in case of error
2141 */
2142xmlParserInputPtr
2143xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2144 xmlParserInputBufferPtr buf;
2145 xmlParserInputPtr inputStream;
2146 char *directory = NULL;
2147 xmlChar *URI = NULL;
2148
2149 if (xmlParserDebugEntities)
2150 xmlGenericError(xmlGenericErrorContext,
2151 "new input from file: %s\n", filename);
2152 if (ctxt == NULL) return(NULL);
2153 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2154 if (buf == NULL)
2155 return(NULL);
2156
2157 URI = xmlStrdup((xmlChar *) filename);
2158 directory = xmlParserGetDirectory((const char *) URI);
2159
2160 inputStream = xmlNewInputStream(ctxt);
2161 if (inputStream == NULL) {
2162 if (directory != NULL) xmlFree((char *) directory);
2163 if (URI != NULL) xmlFree((char *) URI);
2164 return(NULL);
2165 }
2166
2167 inputStream->filename = (const char *) URI;
2168 inputStream->directory = directory;
2169 inputStream->buf = buf;
2170
2171 inputStream->base = inputStream->buf->buffer->content;
2172 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002173 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002174 if ((ctxt->directory == NULL) && (directory != NULL))
2175 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2176 return(inputStream);
2177}
2178
2179/************************************************************************
2180 * *
2181 * Commodity functions to handle parser contexts *
2182 * *
2183 ************************************************************************/
2184
2185/**
2186 * xmlInitParserCtxt:
2187 * @ctxt: an XML parser context
2188 *
2189 * Initialize a parser context
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002190 *
2191 * Returns 0 in case of success and -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00002192 */
2193
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002194int
Owen Taylor3473f882001-02-23 17:55:21 +00002195xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2196{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002197 if(ctxt==NULL) {
2198 xmlGenericError(xmlGenericErrorContext,
2199 "xmlInitParserCtxt: NULL context given\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002200 return(-1);
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002201 }
2202
Owen Taylor3473f882001-02-23 17:55:21 +00002203 xmlDefaultSAXHandlerInit();
2204
William M. Brack8b2c7f12002-11-22 05:07:29 +00002205 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2206 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002207 xmlGenericError(xmlGenericErrorContext,
2208 "xmlInitParserCtxt: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002209 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 }
2211 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002212 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002213
2214 /* Allocate the Input stack */
2215 ctxt->inputTab = (xmlParserInputPtr *)
2216 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2217 if (ctxt->inputTab == NULL) {
2218 xmlGenericError(xmlGenericErrorContext,
2219 "xmlInitParserCtxt: out of memory\n");
2220 ctxt->inputNr = 0;
2221 ctxt->inputMax = 0;
2222 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002223 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002224 }
2225 ctxt->inputNr = 0;
2226 ctxt->inputMax = 5;
2227 ctxt->input = NULL;
2228
2229 ctxt->version = NULL;
2230 ctxt->encoding = NULL;
2231 ctxt->standalone = -1;
2232 ctxt->hasExternalSubset = 0;
2233 ctxt->hasPErefs = 0;
2234 ctxt->html = 0;
2235 ctxt->external = 0;
2236 ctxt->instate = XML_PARSER_START;
2237 ctxt->token = 0;
2238 ctxt->directory = NULL;
2239
2240 /* Allocate the Node stack */
2241 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2242 if (ctxt->nodeTab == NULL) {
2243 xmlGenericError(xmlGenericErrorContext,
2244 "xmlInitParserCtxt: out of memory\n");
2245 ctxt->nodeNr = 0;
2246 ctxt->nodeMax = 0;
2247 ctxt->node = NULL;
2248 ctxt->inputNr = 0;
2249 ctxt->inputMax = 0;
2250 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002251 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002252 }
2253 ctxt->nodeNr = 0;
2254 ctxt->nodeMax = 10;
2255 ctxt->node = NULL;
2256
2257 /* Allocate the Name stack */
2258 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2259 if (ctxt->nameTab == NULL) {
2260 xmlGenericError(xmlGenericErrorContext,
2261 "xmlInitParserCtxt: out of memory\n");
2262 ctxt->nodeNr = 0;
2263 ctxt->nodeMax = 0;
2264 ctxt->node = NULL;
2265 ctxt->inputNr = 0;
2266 ctxt->inputMax = 0;
2267 ctxt->input = NULL;
2268 ctxt->nameNr = 0;
2269 ctxt->nameMax = 0;
2270 ctxt->name = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002271 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002272 }
2273 ctxt->nameNr = 0;
2274 ctxt->nameMax = 10;
2275 ctxt->name = NULL;
2276
2277 /* Allocate the space stack */
2278 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2279 if (ctxt->spaceTab == NULL) {
2280 xmlGenericError(xmlGenericErrorContext,
2281 "xmlInitParserCtxt: out of memory\n");
2282 ctxt->nodeNr = 0;
2283 ctxt->nodeMax = 0;
2284 ctxt->node = NULL;
2285 ctxt->inputNr = 0;
2286 ctxt->inputMax = 0;
2287 ctxt->input = NULL;
2288 ctxt->nameNr = 0;
2289 ctxt->nameMax = 0;
2290 ctxt->name = NULL;
2291 ctxt->spaceNr = 0;
2292 ctxt->spaceMax = 0;
2293 ctxt->space = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002294 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
2296 ctxt->spaceNr = 1;
2297 ctxt->spaceMax = 10;
2298 ctxt->spaceTab[0] = -1;
2299 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002300 ctxt->userData = ctxt;
2301 ctxt->myDoc = NULL;
2302 ctxt->wellFormed = 1;
2303 ctxt->valid = 1;
2304 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2305 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2306 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002307 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002308 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002309 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002310 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002311
Owen Taylor3473f882001-02-23 17:55:21 +00002312 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002313 ctxt->vctxt.error = xmlParserValidityError;
2314 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002315 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002316 if (xmlGetWarningsDefaultValue == 0)
2317 ctxt->vctxt.warning = NULL;
2318 else
2319 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002320 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002321 }
2322 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2323 ctxt->record_info = 0;
2324 ctxt->nbChars = 0;
2325 ctxt->checkIndex = 0;
2326 ctxt->inSubset = 0;
2327 ctxt->errNo = XML_ERR_OK;
2328 ctxt->depth = 0;
2329 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002330 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 xmlInitNodeInfoSeq(&ctxt->node_seq);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002332 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002333}
2334
2335/**
2336 * xmlFreeParserCtxt:
2337 * @ctxt: an XML parser context
2338 *
2339 * Free all the memory used by a parser context. However the parsed
2340 * document in ctxt->myDoc is not freed.
2341 */
2342
2343void
2344xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2345{
2346 xmlParserInputPtr input;
2347 xmlChar *oldname;
2348
2349 if (ctxt == NULL) return;
2350
2351 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2352 xmlFreeInputStream(input);
2353 }
2354 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2355 xmlFree(oldname);
2356 }
2357 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2358 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2359 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2360 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2361 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2362 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2363 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2364 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2365 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002366 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2367 xmlFree(ctxt->sax);
2368 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002369 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002370#ifdef LIBXML_CATALOG_ENABLED
2371 if (ctxt->catalogs != NULL)
2372 xmlCatalogFreeLocal(ctxt->catalogs);
2373#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002374 xmlFree(ctxt);
2375}
2376
2377/**
2378 * xmlNewParserCtxt:
2379 *
2380 * Allocate and initialize a new parser context.
2381 *
2382 * Returns the xmlParserCtxtPtr or NULL
2383 */
2384
2385xmlParserCtxtPtr
2386xmlNewParserCtxt()
2387{
2388 xmlParserCtxtPtr ctxt;
2389
2390 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2391 if (ctxt == NULL) {
2392 xmlGenericError(xmlGenericErrorContext,
2393 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002394 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002395 return(NULL);
2396 }
2397 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002398 if (xmlInitParserCtxt(ctxt) < 0) {
2399 xmlFreeParserCtxt(ctxt);
2400 return(NULL);
2401 }
Owen Taylor3473f882001-02-23 17:55:21 +00002402 return(ctxt);
2403}
2404
2405/************************************************************************
2406 * *
2407 * Handling of node informations *
2408 * *
2409 ************************************************************************/
2410
2411/**
2412 * xmlClearParserCtxt:
2413 * @ctxt: an XML parser context
2414 *
2415 * Clear (release owned resources) and reinitialize a parser context
2416 */
2417
2418void
2419xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2420{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002421 if (ctxt==NULL)
2422 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002423 xmlClearNodeInfoSeq(&ctxt->node_seq);
2424 xmlInitParserCtxt(ctxt);
2425}
2426
2427/**
2428 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002429 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002430 * @node: an XML node within the tree
2431 *
2432 * Find the parser node info struct for a given node
2433 *
2434 * Returns an xmlParserNodeInfo block pointer or NULL
2435 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002436const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2437 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002438{
2439 unsigned long pos;
2440
2441 /* Find position where node should be at */
2442 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002443 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002444 return &ctx->node_seq.buffer[pos];
2445 else
2446 return NULL;
2447}
2448
2449
2450/**
2451 * xmlInitNodeInfoSeq:
2452 * @seq: a node info sequence pointer
2453 *
2454 * -- Initialize (set to initial state) node info sequence
2455 */
2456void
2457xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2458{
2459 seq->length = 0;
2460 seq->maximum = 0;
2461 seq->buffer = NULL;
2462}
2463
2464/**
2465 * xmlClearNodeInfoSeq:
2466 * @seq: a node info sequence pointer
2467 *
2468 * -- Clear (release memory and reinitialize) node
2469 * info sequence
2470 */
2471void
2472xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2473{
2474 if ( seq->buffer != NULL )
2475 xmlFree(seq->buffer);
2476 xmlInitNodeInfoSeq(seq);
2477}
2478
2479
2480/**
2481 * xmlParserFindNodeInfoIndex:
2482 * @seq: a node info sequence pointer
2483 * @node: an XML node pointer
2484 *
2485 *
2486 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2487 * the given node is or should be at in a sorted sequence
2488 *
2489 * Returns a long indicating the position of the record
2490 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002491unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2492 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002493{
2494 unsigned long upper, lower, middle;
2495 int found = 0;
2496
2497 /* Do a binary search for the key */
2498 lower = 1;
2499 upper = seq->length;
2500 middle = 0;
2501 while ( lower <= upper && !found) {
2502 middle = lower + (upper - lower) / 2;
2503 if ( node == seq->buffer[middle - 1].node )
2504 found = 1;
2505 else if ( node < seq->buffer[middle - 1].node )
2506 upper = middle - 1;
2507 else
2508 lower = middle + 1;
2509 }
2510
2511 /* Return position */
2512 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2513 return middle;
2514 else
2515 return middle - 1;
2516}
2517
2518
2519/**
2520 * xmlParserAddNodeInfo:
2521 * @ctxt: an XML parser context
2522 * @info: a node info sequence pointer
2523 *
2524 * Insert node info record into the sorted sequence
2525 */
2526void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002527xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002528 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002529{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002530 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002531
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002532 /* Find pos and check to see if node is already in the sequence */
2533 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2534 info->node);
2535 if (pos < ctxt->node_seq.length
2536 && ctxt->node_seq.buffer[pos].node == info->node) {
2537 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 }
2539
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002540 /* Otherwise, we need to add new node to buffer */
2541 else {
2542 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2543 xmlParserNodeInfo *tmp_buffer;
2544 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002545
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002546 if (ctxt->node_seq.maximum == 0)
2547 ctxt->node_seq.maximum = 2;
2548 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2549 (2 * ctxt->node_seq.maximum));
2550
2551 if (ctxt->node_seq.buffer == NULL)
Daniel Veillardc4f65ab2003-04-21 23:07:45 +00002552 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002553 else
2554 tmp_buffer =
2555 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2556 byte_size);
2557
2558 if (tmp_buffer == NULL) {
2559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2560 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2561 ctxt->errNo = XML_ERR_NO_MEMORY;
2562 return;
2563 }
2564 ctxt->node_seq.buffer = tmp_buffer;
2565 ctxt->node_seq.maximum *= 2;
2566 }
2567
2568 /* If position is not at end, move elements out of the way */
2569 if (pos != ctxt->node_seq.length) {
2570 unsigned long i;
2571
2572 for (i = ctxt->node_seq.length; i > pos; i--)
2573 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2574 }
2575
2576 /* Copy element and increase length */
2577 ctxt->node_seq.buffer[pos] = *info;
2578 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002579 }
Owen Taylor3473f882001-02-23 17:55:21 +00002580}
2581
2582/************************************************************************
2583 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002584 * Defaults settings *
2585 * *
2586 ************************************************************************/
2587/**
2588 * xmlPedanticParserDefault:
2589 * @val: int 0 or 1
2590 *
2591 * Set and return the previous value for enabling pedantic warnings.
2592 *
2593 * Returns the last value for 0 for no substitution, 1 for substitution.
2594 */
2595
2596int
2597xmlPedanticParserDefault(int val) {
2598 int old = xmlPedanticParserDefaultValue;
2599
2600 xmlPedanticParserDefaultValue = val;
2601 return(old);
2602}
2603
2604/**
2605 * xmlLineNumbersDefault:
2606 * @val: int 0 or 1
2607 *
2608 * Set and return the previous value for enabling line numbers in elements
2609 * contents. This may break on old application and is turned off by default.
2610 *
2611 * Returns the last value for 0 for no substitution, 1 for substitution.
2612 */
2613
2614int
2615xmlLineNumbersDefault(int val) {
2616 int old = xmlLineNumbersDefaultValue;
2617
2618 xmlLineNumbersDefaultValue = val;
2619 return(old);
2620}
2621
2622/**
2623 * xmlSubstituteEntitiesDefault:
2624 * @val: int 0 or 1
2625 *
2626 * Set and return the previous value for default entity support.
2627 * Initially the parser always keep entity references instead of substituting
2628 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002629 * default parser behavior
2630 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002631 * file basis.
2632 *
2633 * Returns the last value for 0 for no substitution, 1 for substitution.
2634 */
2635
2636int
2637xmlSubstituteEntitiesDefault(int val) {
2638 int old = xmlSubstituteEntitiesDefaultValue;
2639
2640 xmlSubstituteEntitiesDefaultValue = val;
2641 return(old);
2642}
2643
2644/**
2645 * xmlKeepBlanksDefault:
2646 * @val: int 0 or 1
2647 *
2648 * Set and return the previous value for default blanks text nodes support.
2649 * The 1.x version of the parser used an heuristic to try to detect
2650 * ignorable white spaces. As a result the SAX callback was generating
2651 * ignorableWhitespace() callbacks instead of characters() one, and when
2652 * using the DOM output text nodes containing those blanks were not generated.
2653 * The 2.x and later version will switch to the XML standard way and
2654 * ignorableWhitespace() are only generated when running the parser in
2655 * validating mode and when the current element doesn't allow CDATA or
2656 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002657 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002658 * on 1.X libs and to switch back to the old mode for compatibility when
2659 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2660 * by using xmlIsBlankNode() commodity function to detect the "empty"
2661 * nodes generated.
2662 * This value also affect autogeneration of indentation when saving code
2663 * if blanks sections are kept, indentation is not generated.
2664 *
2665 * Returns the last value for 0 for no substitution, 1 for substitution.
2666 */
2667
2668int
2669xmlKeepBlanksDefault(int val) {
2670 int old = xmlKeepBlanksDefaultValue;
2671
2672 xmlKeepBlanksDefaultValue = val;
2673 xmlIndentTreeOutput = !val;
2674 return(old);
2675}
2676
2677/************************************************************************
2678 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002679 * Deprecated functions kept for compatibility *
2680 * *
2681 ************************************************************************/
2682
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002683/**
2684 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002685 * @lang: pointer to the string value
2686 *
2687 * Checks that the value conforms to the LanguageID production:
2688 *
2689 * NOTE: this is somewhat deprecated, those productions were removed from
2690 * the XML Second edition.
2691 *
2692 * [33] LanguageID ::= Langcode ('-' Subcode)*
2693 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2694 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2695 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2696 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2697 * [38] Subcode ::= ([a-z] | [A-Z])+
2698 *
2699 * Returns 1 if correct 0 otherwise
2700 **/
2701int
2702xmlCheckLanguageID(const xmlChar *lang) {
2703 const xmlChar *cur = lang;
2704
2705 if (cur == NULL)
2706 return(0);
2707 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2708 ((cur[0] == 'I') && (cur[1] == '-'))) {
2709 /*
2710 * IANA code
2711 */
2712 cur += 2;
2713 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2714 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2715 cur++;
2716 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2717 ((cur[0] == 'X') && (cur[1] == '-'))) {
2718 /*
2719 * User code
2720 */
2721 cur += 2;
2722 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2723 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2724 cur++;
2725 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2726 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2727 /*
2728 * ISO639
2729 */
2730 cur++;
2731 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2732 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2733 cur++;
2734 else
2735 return(0);
2736 } else
2737 return(0);
2738 while (cur[0] != 0) { /* non input consuming */
2739 if (cur[0] != '-')
2740 return(0);
2741 cur++;
2742 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2743 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2744 cur++;
2745 else
2746 return(0);
2747 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2748 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2749 cur++;
2750 }
2751 return(1);
2752}
2753
2754/**
2755 * xmlDecodeEntities:
2756 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002757 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002758 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002759 * @end: an end marker xmlChar, 0 if none
2760 * @end2: an end marker xmlChar, 0 if none
2761 * @end3: an end marker xmlChar, 0 if none
2762 *
2763 * This function is deprecated, we now always process entities content
2764 * through xmlStringDecodeEntities
2765 *
2766 * TODO: remove it in next major release.
2767 *
2768 * [67] Reference ::= EntityRef | CharRef
2769 *
2770 * [69] PEReference ::= '%' Name ';'
2771 *
2772 * Returns A newly allocated string with the substitution done. The caller
2773 * must deallocate it !
2774 */
2775xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002776xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2777 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002778#if 0
2779 xmlChar *buffer = NULL;
2780 unsigned int buffer_size = 0;
2781 unsigned int nbchars = 0;
2782
2783 xmlChar *current = NULL;
2784 xmlEntityPtr ent;
2785 unsigned int max = (unsigned int) len;
2786 int c,l;
2787#endif
2788
2789 static int deprecated = 0;
2790 if (!deprecated) {
2791 xmlGenericError(xmlGenericErrorContext,
2792 "xmlDecodeEntities() deprecated function reached\n");
2793 deprecated = 1;
2794 }
2795
2796#if 0
2797 if (ctxt->depth > 40) {
2798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2799 ctxt->sax->error(ctxt->userData,
2800 "Detected entity reference loop\n");
2801 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002802 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002803 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2804 return(NULL);
2805 }
2806
2807 /*
2808 * allocate a translation buffer.
2809 */
2810 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2811 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2812 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002813 xmlGenericError(xmlGenericErrorContext,
2814 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002815 return(NULL);
2816 }
2817
2818 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002819 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002820 */
2821 GROW;
2822 c = CUR_CHAR(l);
2823 while ((nbchars < max) && (c != end) && /* NOTUSED */
2824 (c != end2) && (c != end3)) {
2825 GROW;
2826 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002828 int val = xmlParseCharRef(ctxt);
2829 COPY_BUF(0,buffer,nbchars,val);
2830 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002831 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002832 (what & XML_SUBSTITUTE_REF)) {
2833 if (xmlParserDebugEntities)
2834 xmlGenericError(xmlGenericErrorContext,
2835 "decoding Entity Reference\n");
2836 ent = xmlParseEntityRef(ctxt);
2837 if ((ent != NULL) &&
2838 (ctxt->replaceEntities != 0)) {
2839 current = ent->content;
2840 while (*current != 0) { /* non input consuming loop */
2841 buffer[nbchars++] = *current++;
2842 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2843 growBuffer(buffer);
2844 }
2845 }
2846 } else if (ent != NULL) {
2847 const xmlChar *cur = ent->name;
2848
2849 buffer[nbchars++] = '&';
2850 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2851 growBuffer(buffer);
2852 }
2853 while (*cur != 0) { /* non input consuming loop */
2854 buffer[nbchars++] = *cur++;
2855 }
2856 buffer[nbchars++] = ';';
2857 }
2858 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2859 /*
2860 * a PEReference induce to switch the entity flow,
2861 * we break here to flush the current set of chars
2862 * parsed if any. We will be called back later.
2863 */
2864 if (xmlParserDebugEntities)
2865 xmlGenericError(xmlGenericErrorContext,
2866 "decoding PE Reference\n");
2867 if (nbchars != 0) break;
2868
2869 xmlParsePEReference(ctxt);
2870
2871 /*
2872 * Pop-up of finished entities.
2873 */
2874 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2875 xmlPopInput(ctxt);
2876
2877 break;
2878 } else {
2879 COPY_BUF(l,buffer,nbchars,c);
2880 NEXTL(l);
2881 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2882 growBuffer(buffer);
2883 }
2884 }
2885 c = CUR_CHAR(l);
2886 }
2887 buffer[nbchars++] = 0;
2888 return(buffer);
2889#endif
2890 return(NULL);
2891}
2892
2893/**
2894 * xmlNamespaceParseNCName:
2895 * @ctxt: an XML parser context
2896 *
2897 * parse an XML namespace name.
2898 *
2899 * TODO: this seems not in use anymore, the namespace handling is done on
2900 * top of the SAX interfaces, i.e. not on raw input.
2901 *
2902 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2903 *
2904 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2905 * CombiningChar | Extender
2906 *
2907 * Returns the namespace name or NULL
2908 */
2909
2910xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002911xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002912#if 0
2913 xmlChar buf[XML_MAX_NAMELEN + 5];
2914 int len = 0, l;
2915 int cur = CUR_CHAR(l);
2916#endif
2917
2918 static int deprecated = 0;
2919 if (!deprecated) {
2920 xmlGenericError(xmlGenericErrorContext,
2921 "xmlNamespaceParseNCName() deprecated function reached\n");
2922 deprecated = 1;
2923 }
2924
2925#if 0
2926 /* load first the value of the char !!! */
2927 GROW;
2928 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2929
2930xmlGenericError(xmlGenericErrorContext,
2931 "xmlNamespaceParseNCName: reached loop 3\n");
2932 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2933 (cur == '.') || (cur == '-') ||
2934 (cur == '_') ||
2935 (IS_COMBINING(cur)) ||
2936 (IS_EXTENDER(cur))) {
2937 COPY_BUF(l,buf,len,cur);
2938 NEXTL(l);
2939 cur = CUR_CHAR(l);
2940 if (len >= XML_MAX_NAMELEN) {
2941 xmlGenericError(xmlGenericErrorContext,
2942 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2943 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2944 (cur == '.') || (cur == '-') ||
2945 (cur == '_') ||
2946 (IS_COMBINING(cur)) ||
2947 (IS_EXTENDER(cur))) {
2948 NEXTL(l);
2949 cur = CUR_CHAR(l);
2950 }
2951 break;
2952 }
2953 }
2954 return(xmlStrndup(buf, len));
2955#endif
2956 return(NULL);
2957}
2958
2959/**
2960 * xmlNamespaceParseQName:
2961 * @ctxt: an XML parser context
2962 * @prefix: a xmlChar **
2963 *
2964 * TODO: this seems not in use anymore, the namespace handling is done on
2965 * top of the SAX interfaces, i.e. not on raw input.
2966 *
2967 * parse an XML qualified name
2968 *
2969 * [NS 5] QName ::= (Prefix ':')? LocalPart
2970 *
2971 * [NS 6] Prefix ::= NCName
2972 *
2973 * [NS 7] LocalPart ::= NCName
2974 *
2975 * Returns the local part, and prefix is updated
2976 * to get the Prefix if any.
2977 */
2978
2979xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002980xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002981
2982 static int deprecated = 0;
2983 if (!deprecated) {
2984 xmlGenericError(xmlGenericErrorContext,
2985 "xmlNamespaceParseQName() deprecated function reached\n");
2986 deprecated = 1;
2987 }
2988
2989#if 0
2990 xmlChar *ret = NULL;
2991
2992 *prefix = NULL;
2993 ret = xmlNamespaceParseNCName(ctxt);
2994 if (RAW == ':') {
2995 *prefix = ret;
2996 NEXT;
2997 ret = xmlNamespaceParseNCName(ctxt);
2998 }
2999
3000 return(ret);
3001#endif
3002 return(NULL);
3003}
3004
3005/**
3006 * xmlNamespaceParseNSDef:
3007 * @ctxt: an XML parser context
3008 *
3009 * parse a namespace prefix declaration
3010 *
3011 * TODO: this seems not in use anymore, the namespace handling is done on
3012 * top of the SAX interfaces, i.e. not on raw input.
3013 *
3014 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3015 *
3016 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
3017 *
3018 * Returns the namespace name
3019 */
3020
3021xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003022xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003023 static int deprecated = 0;
3024 if (!deprecated) {
3025 xmlGenericError(xmlGenericErrorContext,
3026 "xmlNamespaceParseNSDef() deprecated function reached\n");
3027 deprecated = 1;
3028 }
3029 return(NULL);
3030#if 0
3031 xmlChar *name = NULL;
3032
3033 if ((RAW == 'x') && (NXT(1) == 'm') &&
3034 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3035 (NXT(4) == 's')) {
3036 SKIP(5);
3037 if (RAW == ':') {
3038 NEXT;
3039 name = xmlNamespaceParseNCName(ctxt);
3040 }
3041 }
3042 return(name);
3043#endif
3044}
3045
3046/**
3047 * xmlParseQuotedString:
3048 * @ctxt: an XML parser context
3049 *
3050 * Parse and return a string between quotes or doublequotes
3051 *
3052 * TODO: Deprecated, to be removed at next drop of binary compatibility
3053 *
3054 * Returns the string parser or NULL.
3055 */
3056xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003057xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003058 static int deprecated = 0;
3059 if (!deprecated) {
3060 xmlGenericError(xmlGenericErrorContext,
3061 "xmlParseQuotedString() deprecated function reached\n");
3062 deprecated = 1;
3063 }
3064 return(NULL);
3065
3066#if 0
3067 xmlChar *buf = NULL;
3068 int len = 0,l;
3069 int size = XML_PARSER_BUFFER_SIZE;
3070 int c;
3071
3072 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3073 if (buf == NULL) {
3074 xmlGenericError(xmlGenericErrorContext,
3075 "malloc of %d byte failed\n", size);
3076 return(NULL);
3077 }
3078xmlGenericError(xmlGenericErrorContext,
3079 "xmlParseQuotedString: reached loop 4\n");
3080 if (RAW == '"') {
3081 NEXT;
3082 c = CUR_CHAR(l);
3083 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3084 if (len + 5 >= size) {
3085 size *= 2;
3086 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3087 if (buf == NULL) {
3088 xmlGenericError(xmlGenericErrorContext,
3089 "realloc of %d byte failed\n", size);
3090 return(NULL);
3091 }
3092 }
3093 COPY_BUF(l,buf,len,c);
3094 NEXTL(l);
3095 c = CUR_CHAR(l);
3096 }
3097 if (c != '"') {
3098 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "String not closed \"%.50s\"\n", buf);
3102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003104 } else {
3105 NEXT;
3106 }
3107 } else if (RAW == '\''){
3108 NEXT;
3109 c = CUR;
3110 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3111 if (len + 1 >= size) {
3112 size *= 2;
3113 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3114 if (buf == NULL) {
3115 xmlGenericError(xmlGenericErrorContext,
3116 "realloc of %d byte failed\n", size);
3117 return(NULL);
3118 }
3119 }
3120 buf[len++] = c;
3121 NEXT;
3122 c = CUR;
3123 }
3124 if (RAW != '\'') {
3125 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3127 ctxt->sax->error(ctxt->userData,
3128 "String not closed \"%.50s\"\n", buf);
3129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003131 } else {
3132 NEXT;
3133 }
3134 }
3135 return(buf);
3136#endif
3137}
3138
3139/**
3140 * xmlParseNamespace:
3141 * @ctxt: an XML parser context
3142 *
3143 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3144 *
3145 * This is what the older xml-name Working Draft specified, a bunch of
3146 * other stuff may still rely on it, so support is still here as
3147 * if it was declared on the root of the Tree:-(
3148 *
3149 * TODO: remove from library
3150 *
3151 * To be removed at next drop of binary compatibility
3152 */
3153
3154void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003155xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003156 static int deprecated = 0;
3157 if (!deprecated) {
3158 xmlGenericError(xmlGenericErrorContext,
3159 "xmlParseNamespace() deprecated function reached\n");
3160 deprecated = 1;
3161 }
3162
3163#if 0
3164 xmlChar *href = NULL;
3165 xmlChar *prefix = NULL;
3166 int garbage = 0;
3167
3168 /*
3169 * We just skipped "namespace" or "xml:namespace"
3170 */
3171 SKIP_BLANKS;
3172
3173xmlGenericError(xmlGenericErrorContext,
3174 "xmlParseNamespace: reached loop 5\n");
3175 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3176 /*
3177 * We can have "ns" or "prefix" attributes
3178 * Old encoding as 'href' or 'AS' attributes is still supported
3179 */
3180 if ((RAW == 'n') && (NXT(1) == 's')) {
3181 garbage = 0;
3182 SKIP(2);
3183 SKIP_BLANKS;
3184
3185 if (RAW != '=') continue;
3186 NEXT;
3187 SKIP_BLANKS;
3188
3189 href = xmlParseQuotedString(ctxt);
3190 SKIP_BLANKS;
3191 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3192 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3193 garbage = 0;
3194 SKIP(4);
3195 SKIP_BLANKS;
3196
3197 if (RAW != '=') continue;
3198 NEXT;
3199 SKIP_BLANKS;
3200
3201 href = xmlParseQuotedString(ctxt);
3202 SKIP_BLANKS;
3203 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3204 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3205 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3206 garbage = 0;
3207 SKIP(6);
3208 SKIP_BLANKS;
3209
3210 if (RAW != '=') continue;
3211 NEXT;
3212 SKIP_BLANKS;
3213
3214 prefix = xmlParseQuotedString(ctxt);
3215 SKIP_BLANKS;
3216 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3217 garbage = 0;
3218 SKIP(2);
3219 SKIP_BLANKS;
3220
3221 if (RAW != '=') continue;
3222 NEXT;
3223 SKIP_BLANKS;
3224
3225 prefix = xmlParseQuotedString(ctxt);
3226 SKIP_BLANKS;
3227 } else if ((RAW == '?') && (NXT(1) == '>')) {
3228 garbage = 0;
3229 NEXT;
3230 } else {
3231 /*
3232 * Found garbage when parsing the namespace
3233 */
3234 if (!garbage) {
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "xmlParseNamespace found garbage\n");
3238 }
3239 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3240 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003241 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003242 NEXT;
3243 }
3244 }
3245
3246 MOVETO_ENDTAG(CUR_PTR);
3247 NEXT;
3248
3249 /*
3250 * Register the DTD.
3251 if (href != NULL)
3252 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3253 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3254 */
3255
3256 if (prefix != NULL) xmlFree(prefix);
3257 if (href != NULL) xmlFree(href);
3258#endif
3259}
3260
3261/**
3262 * xmlScanName:
3263 * @ctxt: an XML parser context
3264 *
3265 * Trickery: parse an XML name but without consuming the input flow
3266 * Needed for rollback cases. Used only when parsing entities references.
3267 *
3268 * TODO: seems deprecated now, only used in the default part of
3269 * xmlParserHandleReference
3270 *
3271 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3272 * CombiningChar | Extender
3273 *
3274 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3275 *
3276 * [6] Names ::= Name (S Name)*
3277 *
3278 * Returns the Name parsed or NULL
3279 */
3280
3281xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003282xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003283 static int deprecated = 0;
3284 if (!deprecated) {
3285 xmlGenericError(xmlGenericErrorContext,
3286 "xmlScanName() deprecated function reached\n");
3287 deprecated = 1;
3288 }
3289 return(NULL);
3290
3291#if 0
3292 xmlChar buf[XML_MAX_NAMELEN];
3293 int len = 0;
3294
3295 GROW;
3296 if (!IS_LETTER(RAW) && (RAW != '_') &&
3297 (RAW != ':')) {
3298 return(NULL);
3299 }
3300
3301
3302 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3303 (NXT(len) == '.') || (NXT(len) == '-') ||
3304 (NXT(len) == '_') || (NXT(len) == ':') ||
3305 (IS_COMBINING(NXT(len))) ||
3306 (IS_EXTENDER(NXT(len)))) {
3307 GROW;
3308 buf[len] = NXT(len);
3309 len++;
3310 if (len >= XML_MAX_NAMELEN) {
3311 xmlGenericError(xmlGenericErrorContext,
3312 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3313 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3314 (IS_DIGIT(NXT(len))) ||
3315 (NXT(len) == '.') || (NXT(len) == '-') ||
3316 (NXT(len) == '_') || (NXT(len) == ':') ||
3317 (IS_COMBINING(NXT(len))) ||
3318 (IS_EXTENDER(NXT(len))))
3319 len++;
3320 break;
3321 }
3322 }
3323 return(xmlStrndup(buf, len));
3324#endif
3325}
3326
3327/**
3328 * xmlParserHandleReference:
3329 * @ctxt: the parser context
3330 *
3331 * TODO: Remove, now deprecated ... the test is done directly in the
3332 * content parsing
3333 * routines.
3334 *
3335 * [67] Reference ::= EntityRef | CharRef
3336 *
3337 * [68] EntityRef ::= '&' Name ';'
3338 *
3339 * [ WFC: Entity Declared ]
3340 * the Name given in the entity reference must match that in an entity
3341 * declaration, except that well-formed documents need not declare any
3342 * of the following entities: amp, lt, gt, apos, quot.
3343 *
3344 * [ WFC: Parsed Entity ]
3345 * An entity reference must not contain the name of an unparsed entity
3346 *
3347 * [66] CharRef ::= '&#' [0-9]+ ';' |
3348 * '&#x' [0-9a-fA-F]+ ';'
3349 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003350 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003351 * the handling is done accordingly to
3352 * http://www.w3.org/TR/REC-xml#entproc
3353 */
3354void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003355xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003356 static int deprecated = 0;
3357 if (!deprecated) {
3358 xmlGenericError(xmlGenericErrorContext,
3359 "xmlParserHandleReference() deprecated function reached\n");
3360 deprecated = 1;
3361 }
3362
Owen Taylor3473f882001-02-23 17:55:21 +00003363 return;
3364}
3365
3366/**
3367 * xmlHandleEntity:
3368 * @ctxt: an XML parser context
3369 * @entity: an XML entity pointer.
3370 *
3371 * Default handling of defined entities, when should we define a new input
3372 * stream ? When do we just handle that as a set of chars ?
3373 *
3374 * OBSOLETE: to be removed at some point.
3375 */
3376
3377void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003378xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003379 static int deprecated = 0;
3380 if (!deprecated) {
3381 xmlGenericError(xmlGenericErrorContext,
3382 "xmlHandleEntity() deprecated function reached\n");
3383 deprecated = 1;
3384 }
3385
3386#if 0
3387 int len;
3388 xmlParserInputPtr input;
3389
3390 if (entity->content == NULL) {
3391 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3394 entity->name);
3395 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003397 return;
3398 }
3399 len = xmlStrlen(entity->content);
3400 if (len <= 2) goto handle_as_char;
3401
3402 /*
3403 * Redefine its content as an input stream.
3404 */
3405 input = xmlNewEntityInputStream(ctxt, entity);
3406 xmlPushInput(ctxt, input);
3407 return;
3408
3409handle_as_char:
3410 /*
3411 * Just handle the content as a set of chars.
3412 */
3413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3414 (ctxt->sax->characters != NULL))
3415 ctxt->sax->characters(ctxt->userData, entity->content, len);
3416#endif
3417}
3418
3419/**
3420 * xmlNewGlobalNs:
3421 * @doc: the document carrying the namespace
3422 * @href: the URI associated
3423 * @prefix: the prefix for the namespace
3424 *
3425 * Creation of a Namespace, the old way using PI and without scoping
3426 * DEPRECATED !!!
3427 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003428 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003429 */
3430xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003431xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3432 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003433 static int deprecated = 0;
3434 if (!deprecated) {
3435 xmlGenericError(xmlGenericErrorContext,
3436 "xmlNewGlobalNs() deprecated function reached\n");
3437 deprecated = 1;
3438 }
3439 return(NULL);
3440#if 0
3441 xmlNodePtr root;
3442
3443 xmlNsPtr cur;
3444
3445 root = xmlDocGetRootElement(doc);
3446 if (root != NULL)
3447 return(xmlNewNs(root, href, prefix));
3448
3449 /*
3450 * if there is no root element yet, create an old Namespace type
3451 * and it will be moved to the root at save time.
3452 */
3453 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3454 if (cur == NULL) {
3455 xmlGenericError(xmlGenericErrorContext,
3456 "xmlNewGlobalNs : malloc failed\n");
3457 return(NULL);
3458 }
3459 memset(cur, 0, sizeof(xmlNs));
3460 cur->type = XML_GLOBAL_NAMESPACE;
3461
3462 if (href != NULL)
3463 cur->href = xmlStrdup(href);
3464 if (prefix != NULL)
3465 cur->prefix = xmlStrdup(prefix);
3466
3467 /*
3468 * Add it at the end to preserve parsing order ...
3469 */
3470 if (doc != NULL) {
3471 if (doc->oldNs == NULL) {
3472 doc->oldNs = cur;
3473 } else {
3474 xmlNsPtr prev = doc->oldNs;
3475
3476 while (prev->next != NULL) prev = prev->next;
3477 prev->next = cur;
3478 }
3479 }
3480
3481 return(NULL);
3482#endif
3483}
3484
3485/**
3486 * xmlUpgradeOldNs:
3487 * @doc: a document pointer
3488 *
3489 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3490 * DEPRECATED
3491 */
3492void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003493xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003494 static int deprecated = 0;
3495 if (!deprecated) {
3496 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003497 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003498 deprecated = 1;
3499 }
3500#if 0
3501 xmlNsPtr cur;
3502
3503 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3504 if (doc->children == NULL) {
3505#ifdef DEBUG_TREE
3506 xmlGenericError(xmlGenericErrorContext,
3507 "xmlUpgradeOldNs: failed no root !\n");
3508#endif
3509 return;
3510 }
3511
3512 cur = doc->oldNs;
3513 while (cur->next != NULL) {
3514 cur->type = XML_LOCAL_NAMESPACE;
3515 cur = cur->next;
3516 }
3517 cur->type = XML_LOCAL_NAMESPACE;
3518 cur->next = doc->children->nsDef;
3519 doc->children->nsDef = doc->oldNs;
3520 doc->oldNs = NULL;
3521#endif
3522}
3523