blob: b767ed541eed4b29e10facdd834b2ce18ebf1ac8 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard2fdbd322003-08-18 12:15:38 +000050#include <libxml/dict.h>
Daniel Veillard16698282001-09-14 10:29:27 +000051#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000052#ifdef LIBXML_CATALOG_ENABLED
53#include <libxml/catalog.h>
54#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000055#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Owen Taylor3473f882001-02-23 17:55:21 +000060
Daniel Veillard5e2dace2001-07-18 19:30:27 +000061/**
Owen Taylor3473f882001-02-23 17:55:21 +000062 * xmlCheckVersion:
63 * @version: the include version number
64 *
65 * check the compiled lib version against the include one.
66 * This can warn or immediately kill the application
67 */
68void
69xmlCheckVersion(int version) {
70 int myversion = (int) LIBXML_VERSION;
71
Daniel Veillard6f350292001-10-14 09:56:15 +000072 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000073
Owen Taylor3473f882001-02-23 17:55:21 +000074 if ((myversion / 10000) != (version / 10000)) {
75 xmlGenericError(xmlGenericErrorContext,
76 "Fatal: program compiled against libxml %d using libxml %d\n",
77 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000078 fprintf(stderr,
79 "Fatal: program compiled against libxml %d using libxml %d\n",
80 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000081 }
82 if ((myversion / 100) < (version / 100)) {
83 xmlGenericError(xmlGenericErrorContext,
84 "Warning: program compiled against libxml %d using older %d\n",
85 (version / 100), (myversion / 100));
86 }
87}
88
89
Daniel Veillard22090732001-07-16 00:06:07 +000090static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000091 "validate",
92 "load subset",
93 "keep blanks",
94 "disable SAX",
95 "fetch external entities",
96 "substitute entities",
97 "gather line info",
98 "user data",
99 "is html",
100 "is standalone",
101 "stop parser",
102 "document",
103 "is well formed",
104 "is valid",
105 "SAX block",
106 "SAX function internalSubset",
107 "SAX function isStandalone",
108 "SAX function hasInternalSubset",
109 "SAX function hasExternalSubset",
110 "SAX function resolveEntity",
111 "SAX function getEntity",
112 "SAX function entityDecl",
113 "SAX function notationDecl",
114 "SAX function attributeDecl",
115 "SAX function elementDecl",
116 "SAX function unparsedEntityDecl",
117 "SAX function setDocumentLocator",
118 "SAX function startDocument",
119 "SAX function endDocument",
120 "SAX function startElement",
121 "SAX function endElement",
122 "SAX function reference",
123 "SAX function characters",
124 "SAX function ignorableWhitespace",
125 "SAX function processingInstruction",
126 "SAX function comment",
127 "SAX function warning",
128 "SAX function error",
129 "SAX function fatalError",
130 "SAX function getParameterEntity",
131 "SAX function cdataBlock",
132 "SAX function externalSubset",
133};
134
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000135/**
Owen Taylor3473f882001-02-23 17:55:21 +0000136 * xmlGetFeaturesList:
137 * @len: the length of the features name array (input/output)
138 * @result: an array of string to be filled with the features name.
139 *
140 * Copy at most *@len feature names into the @result array
141 *
142 * Returns -1 in case or error, or the total number of features,
143 * len is updated with the number of strings copied,
144 * strings must not be deallocated
145 */
146int
147xmlGetFeaturesList(int *len, const char **result) {
148 int ret, i;
149
150 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
151 if ((len == NULL) || (result == NULL))
152 return(ret);
153 if ((*len < 0) || (*len >= 1000))
154 return(-1);
155 if (*len > ret)
156 *len = ret;
157 for (i = 0;i < *len;i++)
158 result[i] = xmlFeaturesList[i];
159 return(ret);
160}
161
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162/**
Owen Taylor3473f882001-02-23 17:55:21 +0000163 * xmlGetFeature:
164 * @ctxt: an XML/HTML parser context
165 * @name: the feature name
166 * @result: location to store the result
167 *
168 * Read the current value of one feature of this parser instance
169 *
170 * Returns -1 in case or error, 0 otherwise
171 */
172int
173xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
174 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
175 return(-1);
176
177 if (!strcmp(name, "validate")) {
178 *((int *) result) = ctxt->validate;
179 } else if (!strcmp(name, "keep blanks")) {
180 *((int *) result) = ctxt->keepBlanks;
181 } else if (!strcmp(name, "disable SAX")) {
182 *((int *) result) = ctxt->disableSAX;
183 } else if (!strcmp(name, "fetch external entities")) {
184 *((int *) result) = ctxt->loadsubset;
185 } else if (!strcmp(name, "substitute entities")) {
186 *((int *) result) = ctxt->replaceEntities;
187 } else if (!strcmp(name, "gather line info")) {
188 *((int *) result) = ctxt->record_info;
189 } else if (!strcmp(name, "user data")) {
190 *((void **)result) = ctxt->userData;
191 } else if (!strcmp(name, "is html")) {
192 *((int *) result) = ctxt->html;
193 } else if (!strcmp(name, "is standalone")) {
194 *((int *) result) = ctxt->standalone;
195 } else if (!strcmp(name, "document")) {
196 *((xmlDocPtr *) result) = ctxt->myDoc;
197 } else if (!strcmp(name, "is well formed")) {
198 *((int *) result) = ctxt->wellFormed;
199 } else if (!strcmp(name, "is valid")) {
200 *((int *) result) = ctxt->valid;
201 } else if (!strcmp(name, "SAX block")) {
202 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
203 } else if (!strcmp(name, "SAX function internalSubset")) {
204 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
205 } else if (!strcmp(name, "SAX function isStandalone")) {
206 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
207 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
208 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
209 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
210 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
211 } else if (!strcmp(name, "SAX function resolveEntity")) {
212 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
213 } else if (!strcmp(name, "SAX function getEntity")) {
214 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
215 } else if (!strcmp(name, "SAX function entityDecl")) {
216 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
217 } else if (!strcmp(name, "SAX function notationDecl")) {
218 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
219 } else if (!strcmp(name, "SAX function attributeDecl")) {
220 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
221 } else if (!strcmp(name, "SAX function elementDecl")) {
222 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
223 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
224 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
225 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
226 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
227 } else if (!strcmp(name, "SAX function startDocument")) {
228 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
229 } else if (!strcmp(name, "SAX function endDocument")) {
230 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
231 } else if (!strcmp(name, "SAX function startElement")) {
232 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
233 } else if (!strcmp(name, "SAX function endElement")) {
234 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
235 } else if (!strcmp(name, "SAX function reference")) {
236 *((referenceSAXFunc *) result) = ctxt->sax->reference;
237 } else if (!strcmp(name, "SAX function characters")) {
238 *((charactersSAXFunc *) result) = ctxt->sax->characters;
239 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
240 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
241 } else if (!strcmp(name, "SAX function processingInstruction")) {
242 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
243 } else if (!strcmp(name, "SAX function comment")) {
244 *((commentSAXFunc *) result) = ctxt->sax->comment;
245 } else if (!strcmp(name, "SAX function warning")) {
246 *((warningSAXFunc *) result) = ctxt->sax->warning;
247 } else if (!strcmp(name, "SAX function error")) {
248 *((errorSAXFunc *) result) = ctxt->sax->error;
249 } else if (!strcmp(name, "SAX function fatalError")) {
250 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
251 } else if (!strcmp(name, "SAX function getParameterEntity")) {
252 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
253 } else if (!strcmp(name, "SAX function cdataBlock")) {
254 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
255 } else if (!strcmp(name, "SAX function externalSubset")) {
256 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
257 } else {
258 return(-1);
259 }
260 return(0);
261}
262
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000263/**
Owen Taylor3473f882001-02-23 17:55:21 +0000264 * xmlSetFeature:
265 * @ctxt: an XML/HTML parser context
266 * @name: the feature name
267 * @value: pointer to the location of the new value
268 *
269 * Change the current value of one feature of this parser instance
270 *
271 * Returns -1 in case or error, 0 otherwise
272 */
273int
274xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
275 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
276 return(-1);
277
278 if (!strcmp(name, "validate")) {
279 int newvalidate = *((int *) value);
280 if ((!ctxt->validate) && (newvalidate != 0)) {
281 if (ctxt->vctxt.warning == NULL)
282 ctxt->vctxt.warning = xmlParserValidityWarning;
283 if (ctxt->vctxt.error == NULL)
284 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000285 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000286 }
287 ctxt->validate = newvalidate;
288 } else if (!strcmp(name, "keep blanks")) {
289 ctxt->keepBlanks = *((int *) value);
290 } else if (!strcmp(name, "disable SAX")) {
291 ctxt->disableSAX = *((int *) value);
292 } else if (!strcmp(name, "fetch external entities")) {
293 ctxt->loadsubset = *((int *) value);
294 } else if (!strcmp(name, "substitute entities")) {
295 ctxt->replaceEntities = *((int *) value);
296 } else if (!strcmp(name, "gather line info")) {
297 ctxt->record_info = *((int *) value);
298 } else if (!strcmp(name, "user data")) {
299 ctxt->userData = *((void **)value);
300 } else if (!strcmp(name, "is html")) {
301 ctxt->html = *((int *) value);
302 } else if (!strcmp(name, "is standalone")) {
303 ctxt->standalone = *((int *) value);
304 } else if (!strcmp(name, "document")) {
305 ctxt->myDoc = *((xmlDocPtr *) value);
306 } else if (!strcmp(name, "is well formed")) {
307 ctxt->wellFormed = *((int *) value);
308 } else if (!strcmp(name, "is valid")) {
309 ctxt->valid = *((int *) value);
310 } else if (!strcmp(name, "SAX block")) {
311 ctxt->sax = *((xmlSAXHandlerPtr *) value);
312 } else if (!strcmp(name, "SAX function internalSubset")) {
313 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
314 } else if (!strcmp(name, "SAX function isStandalone")) {
315 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
317 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
319 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function resolveEntity")) {
321 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
322 } else if (!strcmp(name, "SAX function getEntity")) {
323 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
324 } else if (!strcmp(name, "SAX function entityDecl")) {
325 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function notationDecl")) {
327 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function attributeDecl")) {
329 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function elementDecl")) {
331 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
333 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
335 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function startDocument")) {
337 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function endDocument")) {
339 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startElement")) {
341 ctxt->sax->startElement = *((startElementSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endElement")) {
343 ctxt->sax->endElement = *((endElementSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function reference")) {
345 ctxt->sax->reference = *((referenceSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function characters")) {
347 ctxt->sax->characters = *((charactersSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
349 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function processingInstruction")) {
351 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function comment")) {
353 ctxt->sax->comment = *((commentSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function warning")) {
355 ctxt->sax->warning = *((warningSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function error")) {
357 ctxt->sax->error = *((errorSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function fatalError")) {
359 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function getParameterEntity")) {
361 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
362 } else if (!strcmp(name, "SAX function cdataBlock")) {
363 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function externalSubset")) {
365 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
366 } else {
367 return(-1);
368 }
369 return(0);
370}
371
372/************************************************************************
373 * *
374 * Some functions to avoid too large macros *
375 * *
376 ************************************************************************/
377
378/**
379 * xmlIsChar:
380 * @c: an unicode character (int)
381 *
382 * Check whether the character is allowed by the production
383 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
384 * | [#x10000-#x10FFFF]
385 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
386 * Also available as a macro IS_CHAR()
387 *
388 * Returns 0 if not, non-zero otherwise
389 */
390int
391xmlIsChar(int c) {
392 return(
393 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
394 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
395 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
396 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
397}
398
399/**
400 * xmlIsBlank:
401 * @c: an unicode character (int)
402 *
403 * Check whether the character is allowed by the production
404 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
405 * Also available as a macro IS_BLANK()
406 *
407 * Returns 0 if not, non-zero otherwise
408 */
409int
410xmlIsBlank(int c) {
411 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
412}
413
Owen Taylor3473f882001-02-23 17:55:21 +0000414static int xmlBaseArray[] = {
415 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
419 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
421 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
423 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
427 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
428 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
430 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
431};
432
Daniel Veillard01c13b52002-12-10 15:19:08 +0000433/**
434 * xmlIsBaseChar:
435 * @c: an unicode character (int)
436 *
437 * Check whether the character is allowed by the production
438 * [85] BaseChar ::= ... long list see REC ...
439 *
440 * VI is your friend !
441 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
442 * and
443 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
444 *
445 * Returns 0 if not, non-zero otherwise
446 */
Owen Taylor3473f882001-02-23 17:55:21 +0000447int
448xmlIsBaseChar(int c) {
449 return(
450 (((c) < 0x0100) ? xmlBaseArray[c] :
451 ( /* accelerator */
452 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
453 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
454 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
455 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
456 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
457 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
458 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
459 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
460 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
461 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
462 ((c) == 0x0386) ||
463 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
464 ((c) == 0x038C) ||
465 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
466 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
467 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
468 ((c) == 0x03DA) ||
469 ((c) == 0x03DC) ||
470 ((c) == 0x03DE) ||
471 ((c) == 0x03E0) ||
472 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
473 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
474 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
475 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
476 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
477 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
478 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
479 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
480 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
481 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
482 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
483 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
484 ((c) == 0x0559) ||
485 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
486 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
487 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
488 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
489 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
490 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
491 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
492 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
493 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
494 ((c) == 0x06D5) ||
495 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
496 (((c) >= 0x905) && ( /* accelerator */
497 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
498 ((c) == 0x093D) ||
499 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
500 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
501 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
502 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
503 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
504 ((c) == 0x09B2) ||
505 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
506 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
507 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
508 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
509 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
510 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
511 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
512 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
513 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
514 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
515 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
516 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
517 ((c) == 0x0A5E) ||
518 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
519 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
520 ((c) == 0x0A8D) ||
521 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
522 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
523 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
524 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
525 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
526 ((c) == 0x0ABD) ||
527 ((c) == 0x0AE0) ||
528 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
529 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
530 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
531 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
532 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
533 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
534 ((c) == 0x0B3D) ||
535 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
536 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
537 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
538 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
539 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
540 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
541 ((c) == 0x0B9C) ||
542 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
543 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
544 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
545 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
546 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
547 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
548 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
549 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
550 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
551 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
552 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
553 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
554 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
555 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
556 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
557 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
558 ((c) == 0x0CDE) ||
559 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
560 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
561 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
562 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
563 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
564 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
565 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
566 ((c) == 0x0E30) ||
567 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
568 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
569 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
570 ((c) == 0x0E84) ||
571 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
572 ((c) == 0x0E8A) ||
573 ((c) == 0x0E8D) ||
574 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
575 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
576 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
577 ((c) == 0x0EA5) ||
578 ((c) == 0x0EA7) ||
579 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
580 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
581 ((c) == 0x0EB0) ||
582 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
583 ((c) == 0x0EBD) ||
584 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
585 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
586 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
587 (((c) >= 0x10A0) && ( /* accelerator */
588 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
589 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
590 ((c) == 0x1100) ||
591 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
592 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
593 ((c) == 0x1109) ||
594 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
595 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
596 ((c) == 0x113C) ||
597 ((c) == 0x113E) ||
598 ((c) == 0x1140) ||
599 ((c) == 0x114C) ||
600 ((c) == 0x114E) ||
601 ((c) == 0x1150) ||
602 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
603 ((c) == 0x1159) ||
604 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
605 ((c) == 0x1163) ||
606 ((c) == 0x1165) ||
607 ((c) == 0x1167) ||
608 ((c) == 0x1169) ||
609 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
610 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
611 ((c) == 0x1175) ||
612 ((c) == 0x119E) ||
613 ((c) == 0x11A8) ||
614 ((c) == 0x11AB) ||
615 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
616 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
617 ((c) == 0x11BA) ||
618 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
619 ((c) == 0x11EB) ||
620 ((c) == 0x11F0) ||
621 ((c) == 0x11F9) ||
622 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
623 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
624 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
625 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
626 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
627 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
628 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
629 ((c) == 0x1F59) ||
630 ((c) == 0x1F5B) ||
631 ((c) == 0x1F5D) ||
632 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
633 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
634 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
635 ((c) == 0x1FBE) ||
636 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
637 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
638 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
639 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
640 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
641 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
642 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
643 ((c) == 0x2126) ||
644 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
645 ((c) == 0x212E) ||
646 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
647 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
648 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
649 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
650 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
651}
652
653/**
654 * xmlIsDigit:
655 * @c: an unicode character (int)
656 *
657 * Check whether the character is allowed by the production
658 * [88] Digit ::= ... long list see REC ...
659 *
660 * Returns 0 if not, non-zero otherwise
661 */
662int
663xmlIsDigit(int c) {
664 return(
665 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
666 (((c) >= 0x660) && ( /* accelerator */
667 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
668 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
669 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
670 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
671 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
672 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
673 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
674 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
675 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
676 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
677 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
678 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
679 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
680 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
681}
682
683/**
684 * xmlIsCombining:
685 * @c: an unicode character (int)
686 *
687 * Check whether the character is allowed by the production
688 * [87] CombiningChar ::= ... long list see REC ...
689 *
690 * Returns 0 if not, non-zero otherwise
691 */
692int
693xmlIsCombining(int c) {
694 return(
695 (((c) >= 0x300) && ( /* accelerator */
696 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
697 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
698 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
699 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
700 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
701 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
702 ((c) == 0x05BF) ||
703 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
704 ((c) == 0x05C4) ||
705 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
706 ((c) == 0x0670) ||
707 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
708 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
709 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
710 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
711 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
712 (((c) >= 0x0901) && ( /* accelerator */
713 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
714 ((c) == 0x093C) ||
715 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
716 ((c) == 0x094D) ||
717 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
718 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
719 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
720 ((c) == 0x09BC) ||
721 ((c) == 0x09BE) ||
722 ((c) == 0x09BF) ||
723 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
724 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
725 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
726 ((c) == 0x09D7) ||
727 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
728 (((c) >= 0x0A02) && ( /* accelerator */
729 ((c) == 0x0A02) ||
730 ((c) == 0x0A3C) ||
731 ((c) == 0x0A3E) ||
732 ((c) == 0x0A3F) ||
733 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
734 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
735 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
736 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
737 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
738 ((c) == 0x0ABC) ||
739 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
740 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
741 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
742 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
743 ((c) == 0x0B3C) ||
744 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
745 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
746 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
747 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
748 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
749 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
750 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
751 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
752 ((c) == 0x0BD7) ||
753 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
754 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
755 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
756 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
757 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
758 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
759 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
760 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
761 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
762 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
763 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
764 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
765 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
766 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
767 ((c) == 0x0D57) ||
768 (((c) >= 0x0E31) && ( /* accelerator */
769 ((c) == 0x0E31) ||
770 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
771 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
772 ((c) == 0x0EB1) ||
773 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
774 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
775 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
776 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
777 ((c) == 0x0F35) ||
778 ((c) == 0x0F37) ||
779 ((c) == 0x0F39) ||
780 ((c) == 0x0F3E) ||
781 ((c) == 0x0F3F) ||
782 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
783 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
784 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
785 ((c) == 0x0F97) ||
786 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
787 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
788 ((c) == 0x0FB9) ||
789 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
790 ((c) == 0x20E1) ||
791 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
792 ((c) == 0x3099) ||
793 ((c) == 0x309A))))))))));
794}
795
796/**
797 * xmlIsExtender:
798 * @c: an unicode character (int)
799 *
800 * Check whether the character is allowed by the production
801 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
802 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
803 * [#x309D-#x309E] | [#x30FC-#x30FE]
804 *
805 * Returns 0 if not, non-zero otherwise
806 */
807int
808xmlIsExtender(int c) {
809 switch (c) {
810 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
811 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
812 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
813 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000814 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000815 return 1;
816 default:
817 return 0;
818 }
819}
820
821/**
822 * xmlIsIdeographic:
823 * @c: an unicode character (int)
824 *
825 * Check whether the character is allowed by the production
826 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
827 *
828 * Returns 0 if not, non-zero otherwise
829 */
830int
831xmlIsIdeographic(int c) {
832 return(((c) < 0x0100) ? 0 :
833 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
834 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
835 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
836 ((c) == 0x3007));
837}
838
839/**
840 * xmlIsLetter:
841 * @c: an unicode character (int)
842 *
843 * Check whether the character is allowed by the production
844 * [84] Letter ::= BaseChar | Ideographic
845 *
846 * Returns 0 if not, non-zero otherwise
847 */
848int
849xmlIsLetter(int c) {
850 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
851}
852
853/**
854 * xmlIsPubidChar:
855 * @c: an unicode character (int)
856 *
857 * Check whether the character is allowed by the production
858 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
859 *
860 * Returns 0 if not, non-zero otherwise
861 */
862int
863xmlIsPubidChar(int c) {
864 return(
865 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
866 (((c) >= 'a') && ((c) <= 'z')) ||
867 (((c) >= 'A') && ((c) <= 'Z')) ||
868 (((c) >= '0') && ((c) <= '9')) ||
869 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
870 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
871 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
872 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
873 ((c) == '$') || ((c) == '_') || ((c) == '%'));
874}
875
876/************************************************************************
877 * *
878 * Input handling functions for progressive parsing *
879 * *
880 ************************************************************************/
881
882/* #define DEBUG_INPUT */
883/* #define DEBUG_STACK */
884/* #define DEBUG_PUSH */
885
886
887/* we need to keep enough input to show errors in context */
888#define LINE_LEN 80
889
890#ifdef DEBUG_INPUT
891#define CHECK_BUFFER(in) check_buffer(in)
892
Daniel Veillard01c13b52002-12-10 15:19:08 +0000893static
Owen Taylor3473f882001-02-23 17:55:21 +0000894void check_buffer(xmlParserInputPtr in) {
895 if (in->base != in->buf->buffer->content) {
896 xmlGenericError(xmlGenericErrorContext,
897 "xmlParserInput: base mismatch problem\n");
898 }
899 if (in->cur < in->base) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: cur < base problem\n");
902 }
903 if (in->cur > in->base + in->buf->buffer->use) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur > base + use problem\n");
906 }
907 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
908 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
909 in->buf->buffer->use, in->buf->buffer->size);
910}
911
912#else
913#define CHECK_BUFFER(in)
914#endif
915
916
917/**
918 * xmlParserInputRead:
919 * @in: an XML parser input
920 * @len: an indicative size for the lookahead
921 *
922 * This function refresh the input for the parser. It doesn't try to
923 * preserve pointers to the input buffer, and discard already read data
924 *
925 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
926 * end of this entity
927 */
928int
929xmlParserInputRead(xmlParserInputPtr in, int len) {
930 int ret;
931 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000932 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934#ifdef DEBUG_INPUT
935 xmlGenericError(xmlGenericErrorContext, "Read\n");
936#endif
937 if (in->buf == NULL) return(-1);
938 if (in->base == NULL) return(-1);
939 if (in->cur == NULL) return(-1);
940 if (in->buf->buffer == NULL) return(-1);
941 if (in->buf->readcallback == NULL) return(-1);
942
943 CHECK_BUFFER(in);
944
945 used = in->cur - in->buf->buffer->content;
946 ret = xmlBufferShrink(in->buf->buffer, used);
947 if (ret > 0) {
948 in->cur -= ret;
949 in->consumed += ret;
950 }
951 ret = xmlParserInputBufferRead(in->buf, len);
952 if (in->base != in->buf->buffer->content) {
953 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000954 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000955 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000956 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000957 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000959 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000960 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000961
962 CHECK_BUFFER(in);
963
964 return(ret);
965}
966
967/**
968 * xmlParserInputGrow:
969 * @in: an XML parser input
970 * @len: an indicative size for the lookahead
971 *
972 * This function increase the input for the parser. It tries to
973 * preserve pointers to the input buffer, and keep already read data
974 *
975 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
976 * end of this entity
977 */
978int
979xmlParserInputGrow(xmlParserInputPtr in, int len) {
980 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000981 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000982
983#ifdef DEBUG_INPUT
984 xmlGenericError(xmlGenericErrorContext, "Grow\n");
985#endif
986 if (in->buf == NULL) return(-1);
987 if (in->base == NULL) return(-1);
988 if (in->cur == NULL) return(-1);
989 if (in->buf->buffer == NULL) return(-1);
990
991 CHECK_BUFFER(in);
992
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000993 indx = in->cur - in->base;
994 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995
996 CHECK_BUFFER(in);
997
998 return(0);
999 }
1000 if (in->buf->readcallback != NULL)
1001 ret = xmlParserInputBufferGrow(in->buf, len);
1002 else
1003 return(0);
1004
1005 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001006 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001007 * block, but we use it really as an integer to do some
1008 * pointer arithmetic. Insure will raise it as a bug but in
1009 * that specific case, that's not !
1010 */
1011 if (in->base != in->buf->buffer->content) {
1012 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001013 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001014 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001015 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001016 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001018 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001019 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001020
1021 CHECK_BUFFER(in);
1022
1023 return(ret);
1024}
1025
1026/**
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1029 *
1030 * This function removes used input for the parser.
1031 */
1032void
1033xmlParserInputShrink(xmlParserInputPtr in) {
1034 int used;
1035 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001036 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001037
1038#ifdef DEBUG_INPUT
1039 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1040#endif
1041 if (in->buf == NULL) return;
1042 if (in->base == NULL) return;
1043 if (in->cur == NULL) return;
1044 if (in->buf->buffer == NULL) return;
1045
1046 CHECK_BUFFER(in);
1047
1048 used = in->cur - in->buf->buffer->content;
1049 /*
1050 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001051 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001052 */
Owen Taylor3473f882001-02-23 17:55:21 +00001053 if (used > INPUT_CHUNK) {
1054 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1055 if (ret > 0) {
1056 in->cur -= ret;
1057 in->consumed += ret;
1058 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001059 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001060 }
1061
1062 CHECK_BUFFER(in);
1063
1064 if (in->buf->buffer->use > INPUT_CHUNK) {
1065 return;
1066 }
1067 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1068 if (in->base != in->buf->buffer->content) {
1069 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001070 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001071 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001072 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001073 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001075 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001076 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001077
1078 CHECK_BUFFER(in);
1079}
1080
1081/************************************************************************
1082 * *
1083 * UTF8 character input and related functions *
1084 * *
1085 ************************************************************************/
1086
1087/**
1088 * xmlNextChar:
1089 * @ctxt: the XML parser context
1090 *
1091 * Skip to the next char input char.
1092 */
1093
1094void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001095xmlNextChar(xmlParserCtxtPtr ctxt)
1096{
Owen Taylor3473f882001-02-23 17:55:21 +00001097 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001098 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001099
Daniel Veillardfdc91562002-07-01 21:52:03 +00001100 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001101 if ((*ctxt->input->cur == 0) &&
1102 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1103 (ctxt->instate != XML_PARSER_COMMENT)) {
1104 /*
1105 * If we are at the end of the current entity and
1106 * the context allows it, we pop consumed entities
1107 * automatically.
1108 * the auto closing should be blocked in other cases
1109 */
1110 xmlPopInput(ctxt);
1111 } else {
1112 const unsigned char *cur;
1113 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001114
Daniel Veillard77a90a72003-03-22 00:04:05 +00001115 /*
1116 * 2.11 End-of-Line Handling
1117 * the literal two-character sequence "#xD#xA" or a standalone
1118 * literal #xD, an XML processor must pass to the application
1119 * the single character #xA.
1120 */
1121 if (*(ctxt->input->cur) == '\n') {
1122 ctxt->input->line++;
1123 ctxt->input->col = 1;
1124 } else
1125 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001126
Daniel Veillard77a90a72003-03-22 00:04:05 +00001127 /*
1128 * We are supposed to handle UTF8, check it's valid
1129 * From rfc2044: encoding of the Unicode values on UTF-8:
1130 *
1131 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1132 * 0000 0000-0000 007F 0xxxxxxx
1133 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1134 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1135 *
1136 * Check for the 0x110000 limit too
1137 */
1138 cur = ctxt->input->cur;
1139
1140 c = *cur;
1141 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001142 if (c == 0xC0)
1143 goto encoding_error;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001144 if (cur[1] == 0)
1145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1146 if ((cur[1] & 0xc0) != 0x80)
1147 goto encoding_error;
1148 if ((c & 0xe0) == 0xe0) {
1149 unsigned int val;
1150
1151 if (cur[2] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if ((cur[2] & 0xc0) != 0x80)
1154 goto encoding_error;
1155 if ((c & 0xf0) == 0xf0) {
1156 if (cur[3] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if (((c & 0xf8) != 0xf0) ||
1159 ((cur[3] & 0xc0) != 0x80))
1160 goto encoding_error;
1161 /* 4-byte code */
1162 ctxt->input->cur += 4;
1163 val = (cur[0] & 0x7) << 18;
1164 val |= (cur[1] & 0x3f) << 12;
1165 val |= (cur[2] & 0x3f) << 6;
1166 val |= cur[3] & 0x3f;
1167 } else {
1168 /* 3-byte code */
1169 ctxt->input->cur += 3;
1170 val = (cur[0] & 0xf) << 12;
1171 val |= (cur[1] & 0x3f) << 6;
1172 val |= cur[2] & 0x3f;
1173 }
1174 if (((val > 0xd7ff) && (val < 0xe000)) ||
1175 ((val > 0xfffd) && (val < 0x10000)) ||
1176 (val >= 0x110000)) {
1177 if ((ctxt->sax != NULL) &&
1178 (ctxt->sax->error != NULL))
1179 ctxt->sax->error(ctxt->userData,
1180 "Char 0x%X out of allowed range\n",
1181 val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 if (ctxt->recovery == 0)
1185 ctxt->disableSAX = 1;
1186 }
1187 } else
1188 /* 2-byte code */
1189 ctxt->input->cur += 2;
1190 } else
1191 /* 1-byte code */
1192 ctxt->input->cur++;
1193
1194 ctxt->nbChars++;
1195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1197 }
Owen Taylor3473f882001-02-23 17:55:21 +00001198 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001199 /*
1200 * Assume it's a fixed length encoding (1) with
1201 * a compatible encoding for the ASCII set, since
1202 * XML constructs only use < 128 chars
1203 */
1204
1205 if (*(ctxt->input->cur) == '\n') {
1206 ctxt->input->line++;
1207 ctxt->input->col = 1;
1208 } else
1209 ctxt->input->col++;
1210 ctxt->input->cur++;
1211 ctxt->nbChars++;
1212 if (*ctxt->input->cur == 0)
1213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001214 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001215 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001217 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001218 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001219 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001220 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001221 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001222 /*
1223 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001224 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001225 * declaration header. Report the error and switch the encoding
1226 * to ISO-Latin-1 (if you don't like this policy, just declare the
1227 * encoding !)
1228 */
1229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001230 ctxt->sax->error(ctxt->userData,
1231 "Input is not proper UTF-8, indicate encoding !\n");
1232 ctxt->sax->error(ctxt->userData,
1233 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1234 ctxt->input->cur[0], ctxt->input->cur[1],
1235 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001236 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001237 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001238 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1239
Daniel Veillard77a90a72003-03-22 00:04:05 +00001240 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001241 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001242 return;
1243}
1244
1245/**
1246 * xmlCurrentChar:
1247 * @ctxt: the XML parser context
1248 * @len: pointer to the length of the char read
1249 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001250 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001251 * bytes in the input buffer. Implement the end of line normalization:
1252 * 2.11 End-of-Line Handling
1253 * Wherever an external parsed entity or the literal entity value
1254 * of an internal parsed entity contains either the literal two-character
1255 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1256 * must pass to the application the single character #xA.
1257 * This behavior can conveniently be produced by normalizing all
1258 * line breaks to #xA on input, before parsing.)
1259 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001260 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001261 */
1262
1263int
1264xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1265 if (ctxt->instate == XML_PARSER_EOF)
1266 return(0);
1267
Daniel Veillard561b7f82002-03-20 21:55:57 +00001268 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1269 *len = 1;
1270 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001271 }
1272 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1273 /*
1274 * We are supposed to handle UTF8, check it's valid
1275 * From rfc2044: encoding of the Unicode values on UTF-8:
1276 *
1277 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1278 * 0000 0000-0000 007F 0xxxxxxx
1279 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1280 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1281 *
1282 * Check for the 0x110000 limit too
1283 */
1284 const unsigned char *cur = ctxt->input->cur;
1285 unsigned char c;
1286 unsigned int val;
1287
1288 c = *cur;
1289 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001290 if (c == 0xC0)
1291 goto encoding_error;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001292 if (cur[1] == 0)
1293 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1294 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001295 goto encoding_error;
1296 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001297
1298 if (cur[2] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001301 goto encoding_error;
1302 if ((c & 0xf0) == 0xf0) {
1303 if (cur[3] == 0)
1304 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001305 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001306 ((cur[3] & 0xc0) != 0x80))
1307 goto encoding_error;
1308 /* 4-byte code */
1309 *len = 4;
1310 val = (cur[0] & 0x7) << 18;
1311 val |= (cur[1] & 0x3f) << 12;
1312 val |= (cur[2] & 0x3f) << 6;
1313 val |= cur[3] & 0x3f;
1314 } else {
1315 /* 3-byte code */
1316 *len = 3;
1317 val = (cur[0] & 0xf) << 12;
1318 val |= (cur[1] & 0x3f) << 6;
1319 val |= cur[2] & 0x3f;
1320 }
1321 } else {
1322 /* 2-byte code */
1323 *len = 2;
1324 val = (cur[0] & 0x1f) << 6;
1325 val |= cur[1] & 0x3f;
1326 }
1327 if (!IS_CHAR(val)) {
1328 if ((ctxt->sax != NULL) &&
1329 (ctxt->sax->error != NULL))
1330 ctxt->sax->error(ctxt->userData,
1331 "Char 0x%X out of allowed range\n", val);
1332 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001335 }
1336 return(val);
1337 } else {
1338 /* 1-byte code */
1339 *len = 1;
1340 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001341 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001342 ctxt->nbChars++;
1343 ctxt->input->cur++;
1344 }
1345 return(0xA);
1346 }
1347 return((int) *ctxt->input->cur);
1348 }
1349 }
1350 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001351 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001352 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001353 * XML constructs only use < 128 chars
1354 */
1355 *len = 1;
1356 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001357 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001358 ctxt->nbChars++;
1359 ctxt->input->cur++;
1360 }
1361 return(0xA);
1362 }
1363 return((int) *ctxt->input->cur);
1364encoding_error:
1365 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001366 * An encoding problem may arise from a truncated input buffer
1367 * splitting a character in the middle. In that case do not raise
1368 * an error but return 0 to endicate an end of stream problem
1369 */
1370 if (ctxt->input->end - ctxt->input->cur < 4) {
1371 *len = 0;
1372 return(0);
1373 }
1374
1375 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001376 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001377 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001378 * declaration header. Report the error and switch the encoding
1379 * to ISO-Latin-1 (if you don't like this policy, just declare the
1380 * encoding !)
1381 */
1382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1383 ctxt->sax->error(ctxt->userData,
1384 "Input is not proper UTF-8, indicate encoding !\n");
1385 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001386 ctxt->input->cur[0], ctxt->input->cur[1],
1387 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001388 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001389 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001390 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1391
1392 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1393 *len = 1;
1394 return((int) *ctxt->input->cur);
1395}
1396
1397/**
1398 * xmlStringCurrentChar:
1399 * @ctxt: the XML parser context
1400 * @cur: pointer to the beginning of the char
1401 * @len: pointer to the length of the char read
1402 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001403 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001404 * bytes in the input buffer.
1405 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001406 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001407 */
1408
1409int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001410xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1411{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001412 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001413 /*
1414 * We are supposed to handle UTF8, check it's valid
1415 * From rfc2044: encoding of the Unicode values on UTF-8:
1416 *
1417 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1418 * 0000 0000-0000 007F 0xxxxxxx
1419 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1420 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1421 *
1422 * Check for the 0x110000 limit too
1423 */
1424 unsigned char c;
1425 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001426
Daniel Veillardd8224e02002-01-13 15:43:22 +00001427 c = *cur;
1428 if (c & 0x80) {
1429 if ((cur[1] & 0xc0) != 0x80)
1430 goto encoding_error;
1431 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001432
Daniel Veillardd8224e02002-01-13 15:43:22 +00001433 if ((cur[2] & 0xc0) != 0x80)
1434 goto encoding_error;
1435 if ((c & 0xf0) == 0xf0) {
1436 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1437 goto encoding_error;
1438 /* 4-byte code */
1439 *len = 4;
1440 val = (cur[0] & 0x7) << 18;
1441 val |= (cur[1] & 0x3f) << 12;
1442 val |= (cur[2] & 0x3f) << 6;
1443 val |= cur[3] & 0x3f;
1444 } else {
1445 /* 3-byte code */
1446 *len = 3;
1447 val = (cur[0] & 0xf) << 12;
1448 val |= (cur[1] & 0x3f) << 6;
1449 val |= cur[2] & 0x3f;
1450 }
1451 } else {
1452 /* 2-byte code */
1453 *len = 2;
1454 val = (cur[0] & 0x1f) << 6;
1455 val |= cur[1] & 0x3f;
1456 }
1457 if (!IS_CHAR(val)) {
1458 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1459 (ctxt->sax->error != NULL))
1460 ctxt->sax->error(ctxt->userData,
1461 "Char 0x%X out of allowed range\n",
1462 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001463 if (ctxt != NULL) {
1464 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1465 ctxt->wellFormed = 0;
1466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1467 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001468 }
1469 return (val);
1470 } else {
1471 /* 1-byte code */
1472 *len = 1;
1473 return ((int) *cur);
1474 }
Owen Taylor3473f882001-02-23 17:55:21 +00001475 }
1476 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001477 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001478 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * XML constructs only use < 128 chars
1480 */
1481 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001482 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001483encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001484
Owen Taylor3473f882001-02-23 17:55:21 +00001485 /*
1486 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001487 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001488 * declaration header. Report the error and switch the encoding
1489 * to ISO-Latin-1 (if you don't like this policy, just declare the
1490 * encoding !)
1491 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001492 if (ctxt != NULL) {
1493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1494 ctxt->sax->error(ctxt->userData,
1495 "Input is not proper UTF-8, indicate encoding !\n");
1496 ctxt->sax->error(ctxt->userData,
1497 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1498 ctxt->input->cur[0], ctxt->input->cur[1],
1499 ctxt->input->cur[2], ctxt->input->cur[3]);
1500 }
1501 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001502 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
Owen Taylor3473f882001-02-23 17:55:21 +00001504
1505 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001506 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001507}
1508
1509/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001510 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001511 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001512 * @val: the char value
1513 *
1514 * append the char value in the array
1515 *
1516 * Returns the number of xmlChar written
1517 */
Owen Taylor3473f882001-02-23 17:55:21 +00001518int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001519xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 /*
1521 * We are supposed to handle UTF8, check it's valid
1522 * From rfc2044: encoding of the Unicode values on UTF-8:
1523 *
1524 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1525 * 0000 0000-0000 007F 0xxxxxxx
1526 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1527 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1528 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001529 if (val >= 0x80) {
1530 xmlChar *savedout = out;
1531 int bits;
1532 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1533 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1534 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1535 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001536 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001537 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001538 val);
1539 return(0);
1540 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001541 for ( ; bits >= 0; bits-= 6)
1542 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1543 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001544 }
1545 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001546 return 1;
1547}
1548
1549/**
1550 * xmlCopyChar:
1551 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001552 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001553 * @val: the char value
1554 *
1555 * append the char value in the array
1556 *
1557 * Returns the number of xmlChar written
1558 */
1559
1560int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001561xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001562 /* the len parameter is ignored */
1563 if (val >= 0x80) {
1564 return(xmlCopyCharMultiByte (out, val));
1565 }
1566 *out = (xmlChar) val;
1567 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001568}
1569
1570/************************************************************************
1571 * *
1572 * Commodity functions to switch encodings *
1573 * *
1574 ************************************************************************/
1575
1576/**
1577 * xmlSwitchEncoding:
1578 * @ctxt: the parser context
1579 * @enc: the encoding value (number)
1580 *
1581 * change the input functions when discovering the character encoding
1582 * of a given entity.
1583 *
1584 * Returns 0 in case of success, -1 otherwise
1585 */
1586int
1587xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1588{
1589 xmlCharEncodingHandlerPtr handler;
1590
1591 switch (enc) {
1592 case XML_CHAR_ENCODING_ERROR:
1593 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1596 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001597 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001598 break;
1599 case XML_CHAR_ENCODING_NONE:
1600 /* let's assume it's UTF-8 without the XML decl */
1601 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1602 return(0);
1603 case XML_CHAR_ENCODING_UTF8:
1604 /* default encoding, no conversion should be needed */
1605 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001606
1607 /*
1608 * Errata on XML-1.0 June 20 2001
1609 * Specific handling of the Byte Order Mark for
1610 * UTF-8
1611 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001612 if ((ctxt->input != NULL) &&
1613 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001614 (ctxt->input->cur[1] == 0xBB) &&
1615 (ctxt->input->cur[2] == 0xBF)) {
1616 ctxt->input->cur += 3;
1617 }
Owen Taylor3473f882001-02-23 17:55:21 +00001618 return(0);
Daniel Veillard2dcb9372003-07-16 21:18:19 +00001619 case XML_CHAR_ENCODING_UTF16LE:
1620 case XML_CHAR_ENCODING_UTF16BE:
1621 /*The raw input characters are encoded
1622 *in UTF-16. As we expect this function
1623 *to be called after xmlCharEncInFunc, we expect
1624 *ctxt->input->cur to contain UTF-8 encoded characters.
1625 *So the raw UTF16 Byte Order Mark
1626 *has also been converted into
1627 *an UTF-8 BOM. Let's skip that BOM.
1628 */
1629 if ((ctxt->input != NULL) &&
1630 (ctxt->input->cur[0] == 0xEF) &&
1631 (ctxt->input->cur[1] == 0xBB) &&
1632 (ctxt->input->cur[2] == 0xBF)) {
1633 ctxt->input->cur += 3;
1634 }
1635 break ;
Owen Taylor3473f882001-02-23 17:55:21 +00001636 default:
1637 break;
1638 }
1639 handler = xmlGetCharEncodingHandler(enc);
1640 if (handler == NULL) {
1641 /*
1642 * Default handlers.
1643 */
1644 switch (enc) {
1645 case XML_CHAR_ENCODING_ERROR:
1646 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1649 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001650 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001651 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1652 break;
1653 case XML_CHAR_ENCODING_NONE:
1654 /* let's assume it's UTF-8 without the XML decl */
1655 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1656 return(0);
1657 case XML_CHAR_ENCODING_UTF8:
1658 case XML_CHAR_ENCODING_ASCII:
1659 /* default encoding, no conversion should be needed */
1660 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1661 return(0);
1662 case XML_CHAR_ENCODING_UTF16LE:
1663 break;
1664 case XML_CHAR_ENCODING_UTF16BE:
1665 break;
1666 case XML_CHAR_ENCODING_UCS4LE:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding USC4 little endian not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_UCS4BE:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding USC4 big endian not supported\n");
1677 break;
1678 case XML_CHAR_ENCODING_EBCDIC:
1679 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1681 ctxt->sax->error(ctxt->userData,
1682 "char encoding EBCDIC not supported\n");
1683 break;
1684 case XML_CHAR_ENCODING_UCS4_2143:
1685 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1687 ctxt->sax->error(ctxt->userData,
1688 "char encoding UCS4 2143 not supported\n");
1689 break;
1690 case XML_CHAR_ENCODING_UCS4_3412:
1691 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693 ctxt->sax->error(ctxt->userData,
1694 "char encoding UCS4 3412 not supported\n");
1695 break;
1696 case XML_CHAR_ENCODING_UCS2:
1697 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1699 ctxt->sax->error(ctxt->userData,
1700 "char encoding UCS2 not supported\n");
1701 break;
1702 case XML_CHAR_ENCODING_8859_1:
1703 case XML_CHAR_ENCODING_8859_2:
1704 case XML_CHAR_ENCODING_8859_3:
1705 case XML_CHAR_ENCODING_8859_4:
1706 case XML_CHAR_ENCODING_8859_5:
1707 case XML_CHAR_ENCODING_8859_6:
1708 case XML_CHAR_ENCODING_8859_7:
1709 case XML_CHAR_ENCODING_8859_8:
1710 case XML_CHAR_ENCODING_8859_9:
1711 /*
1712 * We used to keep the internal content in the
1713 * document encoding however this turns being unmaintainable
1714 * So xmlGetCharEncodingHandler() will return non-null
1715 * values for this now.
1716 */
1717 if ((ctxt->inputNr == 1) &&
1718 (ctxt->encoding == NULL) &&
1719 (ctxt->input->encoding != NULL)) {
1720 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1721 }
1722 ctxt->charset = enc;
1723 return(0);
1724 case XML_CHAR_ENCODING_2022_JP:
1725 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1727 ctxt->sax->error(ctxt->userData,
1728 "char encoding ISO-2022-JPnot supported\n");
1729 break;
1730 case XML_CHAR_ENCODING_SHIFT_JIS:
1731 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1733 ctxt->sax->error(ctxt->userData,
1734 "char encoding Shift_JIS not supported\n");
1735 break;
1736 case XML_CHAR_ENCODING_EUC_JP:
1737 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1739 ctxt->sax->error(ctxt->userData,
1740 "char encoding EUC-JPnot supported\n");
1741 break;
1742 }
1743 }
1744 if (handler == NULL)
1745 return(-1);
1746 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1747 return(xmlSwitchToEncoding(ctxt, handler));
1748}
1749
1750/**
1751 * xmlSwitchToEncoding:
1752 * @ctxt: the parser context
1753 * @handler: the encoding handler
1754 *
1755 * change the input functions when discovering the character encoding
1756 * of a given entity.
1757 *
1758 * Returns 0 in case of success, -1 otherwise
1759 */
1760int
1761xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1762{
1763 int nbchars;
1764
1765 if (handler != NULL) {
1766 if (ctxt->input != NULL) {
1767 if (ctxt->input->buf != NULL) {
1768 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001769 /*
1770 * Check in case the auto encoding detetection triggered
1771 * in already.
1772 */
Owen Taylor3473f882001-02-23 17:55:21 +00001773 if (ctxt->input->buf->encoder == handler)
1774 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001775
1776 /*
1777 * "UTF-16" can be used for both LE and BE
Daniel Veillard878eab02002-02-19 13:46:09 +00001778 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1779 BAD_CAST "UTF-16", 6)) &&
1780 (!xmlStrncmp(BAD_CAST handler->name,
1781 BAD_CAST "UTF-16", 6))) {
1782 return(0);
1783 }
Daniel Veillarda6874ca2003-07-29 16:47:24 +00001784 */
Daniel Veillard878eab02002-02-19 13:46:09 +00001785
Owen Taylor3473f882001-02-23 17:55:21 +00001786 /*
1787 * Note: this is a bit dangerous, but that's what it
1788 * takes to use nearly compatible signature for different
1789 * encodings.
1790 */
1791 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1792 ctxt->input->buf->encoder = handler;
1793 return(0);
1794 }
1795 ctxt->input->buf->encoder = handler;
1796
1797 /*
1798 * Is there already some content down the pipe to convert ?
1799 */
1800 if ((ctxt->input->buf->buffer != NULL) &&
1801 (ctxt->input->buf->buffer->use > 0)) {
1802 int processed;
1803
1804 /*
1805 * Specific handling of the Byte Order Mark for
1806 * UTF-16
1807 */
1808 if ((handler->name != NULL) &&
1809 (!strcmp(handler->name, "UTF-16LE")) &&
1810 (ctxt->input->cur[0] == 0xFF) &&
1811 (ctxt->input->cur[1] == 0xFE)) {
1812 ctxt->input->cur += 2;
1813 }
1814 if ((handler->name != NULL) &&
1815 (!strcmp(handler->name, "UTF-16BE")) &&
1816 (ctxt->input->cur[0] == 0xFE) &&
1817 (ctxt->input->cur[1] == 0xFF)) {
1818 ctxt->input->cur += 2;
1819 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001820 /*
1821 * Errata on XML-1.0 June 20 2001
1822 * Specific handling of the Byte Order Mark for
1823 * UTF-8
1824 */
1825 if ((handler->name != NULL) &&
1826 (!strcmp(handler->name, "UTF-8")) &&
1827 (ctxt->input->cur[0] == 0xEF) &&
1828 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001829 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001830 ctxt->input->cur += 3;
1831 }
Owen Taylor3473f882001-02-23 17:55:21 +00001832
1833 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001834 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001835 * Move it as the raw buffer and create a new input buffer
1836 */
1837 processed = ctxt->input->cur - ctxt->input->base;
1838 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1839 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1840 ctxt->input->buf->buffer = xmlBufferCreate();
1841
1842 if (ctxt->html) {
1843 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001844 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001845 */
1846 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1847 ctxt->input->buf->buffer,
1848 ctxt->input->buf->raw);
1849 } else {
1850 /*
1851 * convert just enough to get
1852 * '<?xml version="1.0" encoding="xxx"?>'
1853 * parsed with the autodetected encoding
1854 * into the parser reading buffer.
1855 */
1856 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1857 ctxt->input->buf->buffer,
1858 ctxt->input->buf->raw);
1859 }
1860 if (nbchars < 0) {
1861 xmlGenericError(xmlGenericErrorContext,
1862 "xmlSwitchToEncoding: encoder error\n");
1863 return(-1);
1864 }
1865 ctxt->input->base =
1866 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001867 ctxt->input->end =
1868 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001869
1870 }
1871 return(0);
1872 } else {
1873 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1874 /*
1875 * When parsing a static memory array one must know the
1876 * size to be able to convert the buffer.
1877 */
1878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1879 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001880 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001881 return(-1);
1882 } else {
1883 int processed;
1884
1885 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001886 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001887 * Move it as the raw buffer and create a new input buffer
1888 */
1889 processed = ctxt->input->cur - ctxt->input->base;
1890
1891 ctxt->input->buf->raw = xmlBufferCreate();
1892 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1893 ctxt->input->length - processed);
1894 ctxt->input->buf->buffer = xmlBufferCreate();
1895
1896 /*
1897 * convert as much as possible of the raw input
1898 * to the parser reading buffer.
1899 */
1900 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1901 ctxt->input->buf->buffer,
1902 ctxt->input->buf->raw);
1903 if (nbchars < 0) {
1904 xmlGenericError(xmlGenericErrorContext,
1905 "xmlSwitchToEncoding: encoder error\n");
1906 return(-1);
1907 }
1908
1909 /*
1910 * Conversion succeeded, get rid of the old buffer
1911 */
1912 if ((ctxt->input->free != NULL) &&
1913 (ctxt->input->base != NULL))
1914 ctxt->input->free((xmlChar *) ctxt->input->base);
1915 ctxt->input->base =
1916 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001917 ctxt->input->end =
1918 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001919 }
1920 }
1921 } else {
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001924 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001925 return(-1);
1926 }
1927 /*
1928 * The parsing is now done in UTF8 natively
1929 */
1930 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1931 } else
1932 return(-1);
1933 return(0);
1934
1935}
1936
1937/************************************************************************
1938 * *
1939 * Commodity functions to handle entities processing *
1940 * *
1941 ************************************************************************/
1942
1943/**
1944 * xmlFreeInputStream:
1945 * @input: an xmlParserInputPtr
1946 *
1947 * Free up an input stream.
1948 */
1949void
1950xmlFreeInputStream(xmlParserInputPtr input) {
1951 if (input == NULL) return;
1952
1953 if (input->filename != NULL) xmlFree((char *) input->filename);
1954 if (input->directory != NULL) xmlFree((char *) input->directory);
1955 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1956 if (input->version != NULL) xmlFree((char *) input->version);
1957 if ((input->free != NULL) && (input->base != NULL))
1958 input->free((xmlChar *) input->base);
1959 if (input->buf != NULL)
1960 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 xmlFree(input);
1962}
1963
1964/**
1965 * xmlNewInputStream:
1966 * @ctxt: an XML parser context
1967 *
1968 * Create a new input stream structure
1969 * Returns the new input stream or NULL
1970 */
1971xmlParserInputPtr
1972xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1973 xmlParserInputPtr input;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001974 static int id = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001975
1976 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1977 if (input == NULL) {
1978 if (ctxt != NULL) {
1979 ctxt->errNo = XML_ERR_NO_MEMORY;
1980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981 ctxt->sax->error(ctxt->userData,
1982 "malloc: couldn't allocate a new input stream\n");
1983 ctxt->errNo = XML_ERR_NO_MEMORY;
1984 }
1985 return(NULL);
1986 }
1987 memset(input, 0, sizeof(xmlParserInput));
1988 input->line = 1;
1989 input->col = 1;
1990 input->standalone = -1;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001991 /*
1992 * we don't care about thread reentrancy unicity for a single
1993 * parser context (and hence thread) is sufficient.
1994 */
1995 input->id = id++;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 return(input);
1997}
1998
1999/**
2000 * xmlNewIOInputStream:
2001 * @ctxt: an XML parser context
2002 * @input: an I/O Input
2003 * @enc: the charset encoding if known
2004 *
2005 * Create a new input stream structure encapsulating the @input into
2006 * a stream suitable for the parser.
2007 *
2008 * Returns the new input stream or NULL
2009 */
2010xmlParserInputPtr
2011xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
2012 xmlCharEncoding enc) {
2013 xmlParserInputPtr inputStream;
2014
2015 if (xmlParserDebugEntities)
2016 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
2017 inputStream = xmlNewInputStream(ctxt);
2018 if (inputStream == NULL) {
2019 return(NULL);
2020 }
2021 inputStream->filename = NULL;
2022 inputStream->buf = input;
2023 inputStream->base = inputStream->buf->buffer->content;
2024 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002025 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (enc != XML_CHAR_ENCODING_NONE) {
2027 xmlSwitchEncoding(ctxt, enc);
2028 }
2029
2030 return(inputStream);
2031}
2032
2033/**
2034 * xmlNewEntityInputStream:
2035 * @ctxt: an XML parser context
2036 * @entity: an Entity pointer
2037 *
2038 * Create a new input stream based on an xmlEntityPtr
2039 *
2040 * Returns the new input stream or NULL
2041 */
2042xmlParserInputPtr
2043xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2044 xmlParserInputPtr input;
2045
2046 if (entity == NULL) {
2047 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2049 ctxt->sax->error(ctxt->userData,
2050 "internal: xmlNewEntityInputStream entity = NULL\n");
2051 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2052 return(NULL);
2053 }
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from entity: %s\n", entity->name);
2057 if (entity->content == NULL) {
2058 switch (entity->etype) {
2059 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2060 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2062 ctxt->sax->error(ctxt->userData,
2063 "xmlNewEntityInputStream unparsed entity !\n");
2064 break;
2065 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2066 case XML_EXTERNAL_PARAMETER_ENTITY:
2067 return(xmlLoadExternalEntity((char *) entity->URI,
2068 (char *) entity->ExternalID, ctxt));
2069 case XML_INTERNAL_GENERAL_ENTITY:
2070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2071 ctxt->sax->error(ctxt->userData,
2072 "Internal entity %s without content !\n", entity->name);
2073 break;
2074 case XML_INTERNAL_PARAMETER_ENTITY:
2075 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2077 ctxt->sax->error(ctxt->userData,
2078 "Internal parameter entity %s without content !\n", entity->name);
2079 break;
2080 case XML_INTERNAL_PREDEFINED_ENTITY:
2081 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2083 ctxt->sax->error(ctxt->userData,
2084 "Predefined entity %s without content !\n", entity->name);
2085 break;
2086 }
2087 return(NULL);
2088 }
2089 input = xmlNewInputStream(ctxt);
2090 if (input == NULL) {
2091 return(NULL);
2092 }
2093 input->filename = (char *) entity->URI;
2094 input->base = entity->content;
2095 input->cur = entity->content;
2096 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002097 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002098 return(input);
2099}
2100
2101/**
2102 * xmlNewStringInputStream:
2103 * @ctxt: an XML parser context
2104 * @buffer: an memory buffer
2105 *
2106 * Create a new input stream based on a memory buffer.
2107 * Returns the new input stream
2108 */
2109xmlParserInputPtr
2110xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2111 xmlParserInputPtr input;
2112
2113 if (buffer == NULL) {
2114 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2116 ctxt->sax->error(ctxt->userData,
2117 "internal: xmlNewStringInputStream string = NULL\n");
2118 return(NULL);
2119 }
2120 if (xmlParserDebugEntities)
2121 xmlGenericError(xmlGenericErrorContext,
2122 "new fixed input: %.30s\n", buffer);
2123 input = xmlNewInputStream(ctxt);
2124 if (input == NULL) {
2125 return(NULL);
2126 }
2127 input->base = buffer;
2128 input->cur = buffer;
2129 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002130 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002131 return(input);
2132}
2133
2134/**
2135 * xmlNewInputFromFile:
2136 * @ctxt: an XML parser context
2137 * @filename: the filename to use as entity
2138 *
2139 * Create a new input stream based on a file.
2140 *
2141 * Returns the new input stream or NULL in case of error
2142 */
2143xmlParserInputPtr
2144xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2145 xmlParserInputBufferPtr buf;
2146 xmlParserInputPtr inputStream;
2147 char *directory = NULL;
2148 xmlChar *URI = NULL;
2149
2150 if (xmlParserDebugEntities)
2151 xmlGenericError(xmlGenericErrorContext,
2152 "new input from file: %s\n", filename);
2153 if (ctxt == NULL) return(NULL);
2154 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2155 if (buf == NULL)
2156 return(NULL);
2157
2158 URI = xmlStrdup((xmlChar *) filename);
2159 directory = xmlParserGetDirectory((const char *) URI);
2160
2161 inputStream = xmlNewInputStream(ctxt);
2162 if (inputStream == NULL) {
2163 if (directory != NULL) xmlFree((char *) directory);
2164 if (URI != NULL) xmlFree((char *) URI);
2165 return(NULL);
2166 }
2167
Daniel Veillard8d8bf2c2003-09-17 19:36:25 +00002168 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
Daniel Veillarda66b1d12003-09-17 20:54:38 +00002169 if (URI != NULL) xmlFree((char *) URI);
Owen Taylor3473f882001-02-23 17:55:21 +00002170 inputStream->directory = directory;
2171 inputStream->buf = buf;
2172
2173 inputStream->base = inputStream->buf->buffer->content;
2174 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002175 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002176 if ((ctxt->directory == NULL) && (directory != NULL))
2177 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2178 return(inputStream);
2179}
2180
2181/************************************************************************
2182 * *
2183 * Commodity functions to handle parser contexts *
2184 * *
2185 ************************************************************************/
2186
2187/**
2188 * xmlInitParserCtxt:
2189 * @ctxt: an XML parser context
2190 *
2191 * Initialize a parser context
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002192 *
2193 * Returns 0 in case of success and -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00002194 */
2195
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002196int
Owen Taylor3473f882001-02-23 17:55:21 +00002197xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2198{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002199 if(ctxt==NULL) {
2200 xmlGenericError(xmlGenericErrorContext,
2201 "xmlInitParserCtxt: NULL context given\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002202 return(-1);
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002203 }
2204
Owen Taylor3473f882001-02-23 17:55:21 +00002205 xmlDefaultSAXHandlerInit();
2206
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002207 ctxt->dict = xmlDictCreate();
2208 if (ctxt->dict == NULL) {
2209 xmlGenericError(xmlGenericErrorContext,
2210 "xmlInitParserCtxt: out of memory\n");
2211 return(-1);
2212 }
William M. Brack8b2c7f12002-11-22 05:07:29 +00002213 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2214 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002215 xmlGenericError(xmlGenericErrorContext,
2216 "xmlInitParserCtxt: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002217 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002218 }
2219 else
Daniel Veillard092643b2003-09-25 14:29:29 +00002220 xmlSAXVersion(ctxt->sax, 2);
Owen Taylor3473f882001-02-23 17:55:21 +00002221
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002222 ctxt->maxatts = 0;
2223 ctxt->atts = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002224 /* Allocate the Input stack */
2225 ctxt->inputTab = (xmlParserInputPtr *)
2226 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2227 if (ctxt->inputTab == NULL) {
2228 xmlGenericError(xmlGenericErrorContext,
2229 "xmlInitParserCtxt: out of memory\n");
2230 ctxt->inputNr = 0;
2231 ctxt->inputMax = 0;
2232 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002233 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002234 }
2235 ctxt->inputNr = 0;
2236 ctxt->inputMax = 5;
2237 ctxt->input = NULL;
2238
2239 ctxt->version = NULL;
2240 ctxt->encoding = NULL;
2241 ctxt->standalone = -1;
2242 ctxt->hasExternalSubset = 0;
2243 ctxt->hasPErefs = 0;
2244 ctxt->html = 0;
2245 ctxt->external = 0;
2246 ctxt->instate = XML_PARSER_START;
2247 ctxt->token = 0;
2248 ctxt->directory = NULL;
2249
2250 /* Allocate the Node stack */
2251 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2252 if (ctxt->nodeTab == NULL) {
2253 xmlGenericError(xmlGenericErrorContext,
2254 "xmlInitParserCtxt: out of memory\n");
2255 ctxt->nodeNr = 0;
2256 ctxt->nodeMax = 0;
2257 ctxt->node = NULL;
2258 ctxt->inputNr = 0;
2259 ctxt->inputMax = 0;
2260 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002261 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002262 }
2263 ctxt->nodeNr = 0;
2264 ctxt->nodeMax = 10;
2265 ctxt->node = NULL;
2266
2267 /* Allocate the Name stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002268 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00002269 if (ctxt->nameTab == NULL) {
2270 xmlGenericError(xmlGenericErrorContext,
2271 "xmlInitParserCtxt: out of memory\n");
2272 ctxt->nodeNr = 0;
2273 ctxt->nodeMax = 0;
2274 ctxt->node = NULL;
2275 ctxt->inputNr = 0;
2276 ctxt->inputMax = 0;
2277 ctxt->input = NULL;
2278 ctxt->nameNr = 0;
2279 ctxt->nameMax = 0;
2280 ctxt->name = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002281 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
2283 ctxt->nameNr = 0;
2284 ctxt->nameMax = 10;
2285 ctxt->name = NULL;
2286
2287 /* Allocate the space stack */
2288 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2289 if (ctxt->spaceTab == NULL) {
2290 xmlGenericError(xmlGenericErrorContext,
2291 "xmlInitParserCtxt: out of memory\n");
2292 ctxt->nodeNr = 0;
2293 ctxt->nodeMax = 0;
2294 ctxt->node = NULL;
2295 ctxt->inputNr = 0;
2296 ctxt->inputMax = 0;
2297 ctxt->input = NULL;
2298 ctxt->nameNr = 0;
2299 ctxt->nameMax = 0;
2300 ctxt->name = NULL;
2301 ctxt->spaceNr = 0;
2302 ctxt->spaceMax = 0;
2303 ctxt->space = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002304 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002305 }
2306 ctxt->spaceNr = 1;
2307 ctxt->spaceMax = 10;
2308 ctxt->spaceTab[0] = -1;
2309 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002310 ctxt->userData = ctxt;
2311 ctxt->myDoc = NULL;
2312 ctxt->wellFormed = 1;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00002313 ctxt->nsWellFormed = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002314 ctxt->valid = 1;
2315 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2316 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2317 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002318 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002319 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002320 if (ctxt->keepBlanks == 0)
Daniel Veillard11476b42003-09-26 14:51:39 +00002321 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002322
Owen Taylor3473f882001-02-23 17:55:21 +00002323 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002324 ctxt->vctxt.error = xmlParserValidityError;
2325 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002326 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002327 if (xmlGetWarningsDefaultValue == 0)
2328 ctxt->vctxt.warning = NULL;
2329 else
2330 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002331 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002332 }
2333 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2334 ctxt->record_info = 0;
2335 ctxt->nbChars = 0;
2336 ctxt->checkIndex = 0;
2337 ctxt->inSubset = 0;
2338 ctxt->errNo = XML_ERR_OK;
2339 ctxt->depth = 0;
2340 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002341 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002342 xmlInitNodeInfoSeq(&ctxt->node_seq);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002343 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002344}
2345
2346/**
2347 * xmlFreeParserCtxt:
2348 * @ctxt: an XML parser context
2349 *
2350 * Free all the memory used by a parser context. However the parsed
2351 * document in ctxt->myDoc is not freed.
2352 */
2353
2354void
2355xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2356{
2357 xmlParserInputPtr input;
Owen Taylor3473f882001-02-23 17:55:21 +00002358
2359 if (ctxt == NULL) return;
2360
2361 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2362 xmlFreeInputStream(input);
2363 }
Owen Taylor3473f882001-02-23 17:55:21 +00002364 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002365 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002366 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2367 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2368 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2369 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00002370 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2371 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Daniel Veillard81273902003-09-30 00:43:48 +00002372#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00002373 if ((ctxt->sax != NULL) &&
2374 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
Daniel Veillard81273902003-09-30 00:43:48 +00002375#else
2376 if (ctxt->sax != NULL)
2377#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002378 xmlFree(ctxt->sax);
2379 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002380 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002381 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002382 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002383 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002384 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2385 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2386 if (ctxt->attsDefault != NULL)
2387 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002388 if (ctxt->attsSpecial != NULL)
2389 xmlHashFree(ctxt->attsSpecial, NULL);
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00002390 if (ctxt->freeElems != NULL) {
2391 xmlNodePtr cur, next;
2392
2393 cur = ctxt->freeElems;
2394 while (cur != NULL) {
2395 next = cur->next;
2396 xmlFree(cur);
2397 cur = next;
2398 }
2399 }
2400 if (ctxt->freeAttrs != NULL) {
2401 xmlAttrPtr cur, next;
2402
2403 cur = ctxt->freeAttrs;
2404 while (cur != NULL) {
2405 next = cur->next;
2406 xmlFree(cur);
2407 cur = next;
2408 }
2409 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00002410
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002411#ifdef LIBXML_CATALOG_ENABLED
2412 if (ctxt->catalogs != NULL)
2413 xmlCatalogFreeLocal(ctxt->catalogs);
2414#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002415 xmlFree(ctxt);
2416}
2417
2418/**
2419 * xmlNewParserCtxt:
2420 *
2421 * Allocate and initialize a new parser context.
2422 *
2423 * Returns the xmlParserCtxtPtr or NULL
2424 */
2425
2426xmlParserCtxtPtr
2427xmlNewParserCtxt()
2428{
2429 xmlParserCtxtPtr ctxt;
2430
2431 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2432 if (ctxt == NULL) {
2433 xmlGenericError(xmlGenericErrorContext,
2434 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002435 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002436 return(NULL);
2437 }
2438 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002439 if (xmlInitParserCtxt(ctxt) < 0) {
2440 xmlFreeParserCtxt(ctxt);
2441 return(NULL);
2442 }
Owen Taylor3473f882001-02-23 17:55:21 +00002443 return(ctxt);
2444}
2445
2446/************************************************************************
2447 * *
2448 * Handling of node informations *
2449 * *
2450 ************************************************************************/
2451
2452/**
2453 * xmlClearParserCtxt:
2454 * @ctxt: an XML parser context
2455 *
2456 * Clear (release owned resources) and reinitialize a parser context
2457 */
2458
2459void
2460xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2461{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002462 if (ctxt==NULL)
2463 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002464 xmlClearNodeInfoSeq(&ctxt->node_seq);
2465 xmlInitParserCtxt(ctxt);
2466}
2467
2468/**
2469 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002470 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002471 * @node: an XML node within the tree
2472 *
2473 * Find the parser node info struct for a given node
2474 *
2475 * Returns an xmlParserNodeInfo block pointer or NULL
2476 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002477const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2478 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002479{
2480 unsigned long pos;
2481
2482 /* Find position where node should be at */
2483 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002484 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return &ctx->node_seq.buffer[pos];
2486 else
2487 return NULL;
2488}
2489
2490
2491/**
2492 * xmlInitNodeInfoSeq:
2493 * @seq: a node info sequence pointer
2494 *
2495 * -- Initialize (set to initial state) node info sequence
2496 */
2497void
2498xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2499{
2500 seq->length = 0;
2501 seq->maximum = 0;
2502 seq->buffer = NULL;
2503}
2504
2505/**
2506 * xmlClearNodeInfoSeq:
2507 * @seq: a node info sequence pointer
2508 *
2509 * -- Clear (release memory and reinitialize) node
2510 * info sequence
2511 */
2512void
2513xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2514{
2515 if ( seq->buffer != NULL )
2516 xmlFree(seq->buffer);
2517 xmlInitNodeInfoSeq(seq);
2518}
2519
2520
2521/**
2522 * xmlParserFindNodeInfoIndex:
2523 * @seq: a node info sequence pointer
2524 * @node: an XML node pointer
2525 *
2526 *
2527 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2528 * the given node is or should be at in a sorted sequence
2529 *
2530 * Returns a long indicating the position of the record
2531 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002532unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2533 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002534{
2535 unsigned long upper, lower, middle;
2536 int found = 0;
2537
2538 /* Do a binary search for the key */
2539 lower = 1;
2540 upper = seq->length;
2541 middle = 0;
2542 while ( lower <= upper && !found) {
2543 middle = lower + (upper - lower) / 2;
2544 if ( node == seq->buffer[middle - 1].node )
2545 found = 1;
2546 else if ( node < seq->buffer[middle - 1].node )
2547 upper = middle - 1;
2548 else
2549 lower = middle + 1;
2550 }
2551
2552 /* Return position */
2553 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2554 return middle;
2555 else
2556 return middle - 1;
2557}
2558
2559
2560/**
2561 * xmlParserAddNodeInfo:
2562 * @ctxt: an XML parser context
2563 * @info: a node info sequence pointer
2564 *
2565 * Insert node info record into the sorted sequence
2566 */
2567void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002568xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002569 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002570{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002571 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002572
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002573 /* Find pos and check to see if node is already in the sequence */
William M. Brack78637da2003-07-31 14:47:38 +00002574 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002575 info->node);
2576 if (pos < ctxt->node_seq.length
2577 && ctxt->node_seq.buffer[pos].node == info->node) {
2578 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002579 }
2580
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002581 /* Otherwise, we need to add new node to buffer */
2582 else {
2583 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2584 xmlParserNodeInfo *tmp_buffer;
2585 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002586
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002587 if (ctxt->node_seq.maximum == 0)
2588 ctxt->node_seq.maximum = 2;
2589 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2590 (2 * ctxt->node_seq.maximum));
2591
2592 if (ctxt->node_seq.buffer == NULL)
Daniel Veillardc4f65ab2003-04-21 23:07:45 +00002593 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002594 else
2595 tmp_buffer =
2596 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2597 byte_size);
2598
2599 if (tmp_buffer == NULL) {
2600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2601 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2602 ctxt->errNo = XML_ERR_NO_MEMORY;
2603 return;
2604 }
2605 ctxt->node_seq.buffer = tmp_buffer;
2606 ctxt->node_seq.maximum *= 2;
2607 }
2608
2609 /* If position is not at end, move elements out of the way */
2610 if (pos != ctxt->node_seq.length) {
2611 unsigned long i;
2612
2613 for (i = ctxt->node_seq.length; i > pos; i--)
2614 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2615 }
2616
2617 /* Copy element and increase length */
2618 ctxt->node_seq.buffer[pos] = *info;
2619 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
Owen Taylor3473f882001-02-23 17:55:21 +00002621}
2622
2623/************************************************************************
2624 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002625 * Defaults settings *
2626 * *
2627 ************************************************************************/
2628/**
2629 * xmlPedanticParserDefault:
2630 * @val: int 0 or 1
2631 *
2632 * Set and return the previous value for enabling pedantic warnings.
2633 *
2634 * Returns the last value for 0 for no substitution, 1 for substitution.
2635 */
2636
2637int
2638xmlPedanticParserDefault(int val) {
2639 int old = xmlPedanticParserDefaultValue;
2640
2641 xmlPedanticParserDefaultValue = val;
2642 return(old);
2643}
2644
2645/**
2646 * xmlLineNumbersDefault:
2647 * @val: int 0 or 1
2648 *
2649 * Set and return the previous value for enabling line numbers in elements
2650 * contents. This may break on old application and is turned off by default.
2651 *
2652 * Returns the last value for 0 for no substitution, 1 for substitution.
2653 */
2654
2655int
2656xmlLineNumbersDefault(int val) {
2657 int old = xmlLineNumbersDefaultValue;
2658
2659 xmlLineNumbersDefaultValue = val;
2660 return(old);
2661}
2662
2663/**
2664 * xmlSubstituteEntitiesDefault:
2665 * @val: int 0 or 1
2666 *
2667 * Set and return the previous value for default entity support.
2668 * Initially the parser always keep entity references instead of substituting
2669 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002670 * default parser behavior
2671 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002672 * file basis.
2673 *
2674 * Returns the last value for 0 for no substitution, 1 for substitution.
2675 */
2676
2677int
2678xmlSubstituteEntitiesDefault(int val) {
2679 int old = xmlSubstituteEntitiesDefaultValue;
2680
2681 xmlSubstituteEntitiesDefaultValue = val;
2682 return(old);
2683}
2684
2685/**
2686 * xmlKeepBlanksDefault:
2687 * @val: int 0 or 1
2688 *
2689 * Set and return the previous value for default blanks text nodes support.
2690 * The 1.x version of the parser used an heuristic to try to detect
2691 * ignorable white spaces. As a result the SAX callback was generating
Daniel Veillard11476b42003-09-26 14:51:39 +00002692 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
Daniel Veillarda53c6882001-07-25 17:18:57 +00002693 * using the DOM output text nodes containing those blanks were not generated.
2694 * The 2.x and later version will switch to the XML standard way and
2695 * ignorableWhitespace() are only generated when running the parser in
2696 * validating mode and when the current element doesn't allow CDATA or
2697 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002698 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002699 * on 1.X libs and to switch back to the old mode for compatibility when
2700 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2701 * by using xmlIsBlankNode() commodity function to detect the "empty"
2702 * nodes generated.
2703 * This value also affect autogeneration of indentation when saving code
2704 * if blanks sections are kept, indentation is not generated.
2705 *
2706 * Returns the last value for 0 for no substitution, 1 for substitution.
2707 */
2708
2709int
2710xmlKeepBlanksDefault(int val) {
2711 int old = xmlKeepBlanksDefaultValue;
2712
2713 xmlKeepBlanksDefaultValue = val;
2714 xmlIndentTreeOutput = !val;
2715 return(old);
2716}
2717