blob: 1e2777d7d45ea3a2e39cf1b2acd2374003d01362 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
Owen Taylor3473f882001-02-23 17:55:21 +0000415static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
432};
433
Daniel Veillard01c13b52002-12-10 15:19:08 +0000434/**
435 * xmlIsBaseChar:
436 * @c: an unicode character (int)
437 *
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
440 *
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
443 * and
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
445 *
446 * Returns 0 if not, non-zero otherwise
447 */
Owen Taylor3473f882001-02-23 17:55:21 +0000448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
Daniel Veillard01c13b52002-12-10 15:19:08 +0000894static
Owen Taylor3473f882001-02-23 17:55:21 +0000895void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
899 }
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
903 }
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
907 }
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
911}
912
913#else
914#define CHECK_BUFFER(in)
915#endif
916
917
918/**
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
922 *
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
925 *
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
927 * end of this entity
928 */
929int
930xmlParserInputRead(xmlParserInputPtr in, int len) {
931 int ret;
932 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000933 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000934
935#ifdef DEBUG_INPUT
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
937#endif
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
943
944 CHECK_BUFFER(in);
945
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
948 if (ret > 0) {
949 in->cur -= ret;
950 in->consumed += ret;
951 }
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
954 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000955 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000956 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000957 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000958 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000959 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000960 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000962
963 CHECK_BUFFER(in);
964
965 return(ret);
966}
967
968/**
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
972 *
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
975 *
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977 * end of this entity
978 */
979int
980xmlParserInputGrow(xmlParserInputPtr in, int len) {
981 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000982 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000983
984#ifdef DEBUG_INPUT
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
986#endif
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
991
992 CHECK_BUFFER(in);
993
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000996
997 CHECK_BUFFER(in);
998
999 return(0);
1000 }
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1003 else
1004 return(0);
1005
1006 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1011 */
1012 if (in->base != in->buf->buffer->content) {
1013 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001015 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001016 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001017 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001018 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001019 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001037 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001038
1039#ifdef DEBUG_INPUT
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
1050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001052 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001053 */
Daniel Veillarda880b122003-04-21 21:36:41 +00001054#if 0
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001055 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001056 return;
Daniel Veillarda880b122003-04-21 21:36:41 +00001057#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 if (ret > 0) {
1061 in->cur -= ret;
1062 in->consumed += ret;
1063 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066
1067 CHECK_BUFFER(in);
1068
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 return;
1071 }
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1074 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001075 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001076 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001077 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001078 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001080 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001082
1083 CHECK_BUFFER(in);
1084}
1085
1086/************************************************************************
1087 * *
1088 * UTF8 character input and related functions *
1089 * *
1090 ************************************************************************/
1091
1092/**
1093 * xmlNextChar:
1094 * @ctxt: the XML parser context
1095 *
1096 * Skip to the next char input char.
1097 */
1098
1099void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001100xmlNextChar(xmlParserCtxtPtr ctxt)
1101{
Owen Taylor3473f882001-02-23 17:55:21 +00001102 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001103 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001104
Daniel Veillardfdc91562002-07-01 21:52:03 +00001105 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001106 if ((*ctxt->input->cur == 0) &&
1107 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1108 (ctxt->instate != XML_PARSER_COMMENT)) {
1109 /*
1110 * If we are at the end of the current entity and
1111 * the context allows it, we pop consumed entities
1112 * automatically.
1113 * the auto closing should be blocked in other cases
1114 */
1115 xmlPopInput(ctxt);
1116 } else {
1117 const unsigned char *cur;
1118 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001119
Daniel Veillard77a90a72003-03-22 00:04:05 +00001120 /*
1121 * 2.11 End-of-Line Handling
1122 * the literal two-character sequence "#xD#xA" or a standalone
1123 * literal #xD, an XML processor must pass to the application
1124 * the single character #xA.
1125 */
1126 if (*(ctxt->input->cur) == '\n') {
1127 ctxt->input->line++;
1128 ctxt->input->col = 1;
1129 } else
1130 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001131
Daniel Veillard77a90a72003-03-22 00:04:05 +00001132 /*
1133 * We are supposed to handle UTF8, check it's valid
1134 * From rfc2044: encoding of the Unicode values on UTF-8:
1135 *
1136 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1137 * 0000 0000-0000 007F 0xxxxxxx
1138 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1139 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1140 *
1141 * Check for the 0x110000 limit too
1142 */
1143 cur = ctxt->input->cur;
1144
1145 c = *cur;
1146 if (c & 0x80) {
1147 if (cur[1] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if ((cur[1] & 0xc0) != 0x80)
1150 goto encoding_error;
1151 if ((c & 0xe0) == 0xe0) {
1152 unsigned int val;
1153
1154 if (cur[2] == 0)
1155 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1156 if ((cur[2] & 0xc0) != 0x80)
1157 goto encoding_error;
1158 if ((c & 0xf0) == 0xf0) {
1159 if (cur[3] == 0)
1160 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1161 if (((c & 0xf8) != 0xf0) ||
1162 ((cur[3] & 0xc0) != 0x80))
1163 goto encoding_error;
1164 /* 4-byte code */
1165 ctxt->input->cur += 4;
1166 val = (cur[0] & 0x7) << 18;
1167 val |= (cur[1] & 0x3f) << 12;
1168 val |= (cur[2] & 0x3f) << 6;
1169 val |= cur[3] & 0x3f;
1170 } else {
1171 /* 3-byte code */
1172 ctxt->input->cur += 3;
1173 val = (cur[0] & 0xf) << 12;
1174 val |= (cur[1] & 0x3f) << 6;
1175 val |= cur[2] & 0x3f;
1176 }
1177 if (((val > 0xd7ff) && (val < 0xe000)) ||
1178 ((val > 0xfffd) && (val < 0x10000)) ||
1179 (val >= 0x110000)) {
1180 if ((ctxt->sax != NULL) &&
1181 (ctxt->sax->error != NULL))
1182 ctxt->sax->error(ctxt->userData,
1183 "Char 0x%X out of allowed range\n",
1184 val);
1185 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1186 ctxt->wellFormed = 0;
1187 if (ctxt->recovery == 0)
1188 ctxt->disableSAX = 1;
1189 }
1190 } else
1191 /* 2-byte code */
1192 ctxt->input->cur += 2;
1193 } else
1194 /* 1-byte code */
1195 ctxt->input->cur++;
1196
1197 ctxt->nbChars++;
1198 if (*ctxt->input->cur == 0)
1199 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1200 }
Owen Taylor3473f882001-02-23 17:55:21 +00001201 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001202 /*
1203 * Assume it's a fixed length encoding (1) with
1204 * a compatible encoding for the ASCII set, since
1205 * XML constructs only use < 128 chars
1206 */
1207
1208 if (*(ctxt->input->cur) == '\n') {
1209 ctxt->input->line++;
1210 ctxt->input->col = 1;
1211 } else
1212 ctxt->input->col++;
1213 ctxt->input->cur++;
1214 ctxt->nbChars++;
1215 if (*ctxt->input->cur == 0)
1216 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001217 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001218 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001219 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001220 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001221 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001222 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001223 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001224 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001225 /*
1226 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001227 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001228 * declaration header. Report the error and switch the encoding
1229 * to ISO-Latin-1 (if you don't like this policy, just declare the
1230 * encoding !)
1231 */
1232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001233 ctxt->sax->error(ctxt->userData,
1234 "Input is not proper UTF-8, indicate encoding !\n");
1235 ctxt->sax->error(ctxt->userData,
1236 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1237 ctxt->input->cur[0], ctxt->input->cur[1],
1238 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001239 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001240 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001241 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1242
Daniel Veillard77a90a72003-03-22 00:04:05 +00001243 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001244 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001245 return;
1246}
1247
1248/**
1249 * xmlCurrentChar:
1250 * @ctxt: the XML parser context
1251 * @len: pointer to the length of the char read
1252 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001253 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001254 * bytes in the input buffer. Implement the end of line normalization:
1255 * 2.11 End-of-Line Handling
1256 * Wherever an external parsed entity or the literal entity value
1257 * of an internal parsed entity contains either the literal two-character
1258 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1259 * must pass to the application the single character #xA.
1260 * This behavior can conveniently be produced by normalizing all
1261 * line breaks to #xA on input, before parsing.)
1262 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001263 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001264 */
1265
1266int
1267xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1268 if (ctxt->instate == XML_PARSER_EOF)
1269 return(0);
1270
Daniel Veillard561b7f82002-03-20 21:55:57 +00001271 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1272 *len = 1;
1273 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001274 }
1275 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1276 /*
1277 * We are supposed to handle UTF8, check it's valid
1278 * From rfc2044: encoding of the Unicode values on UTF-8:
1279 *
1280 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1281 * 0000 0000-0000 007F 0xxxxxxx
1282 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1283 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1284 *
1285 * Check for the 0x110000 limit too
1286 */
1287 const unsigned char *cur = ctxt->input->cur;
1288 unsigned char c;
1289 unsigned int val;
1290
1291 c = *cur;
1292 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001293 if (cur[1] == 0)
1294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1295 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001296 goto encoding_error;
1297 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001298
1299 if (cur[2] == 0)
1300 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1301 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001302 goto encoding_error;
1303 if ((c & 0xf0) == 0xf0) {
1304 if (cur[3] == 0)
1305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001306 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001307 ((cur[3] & 0xc0) != 0x80))
1308 goto encoding_error;
1309 /* 4-byte code */
1310 *len = 4;
1311 val = (cur[0] & 0x7) << 18;
1312 val |= (cur[1] & 0x3f) << 12;
1313 val |= (cur[2] & 0x3f) << 6;
1314 val |= cur[3] & 0x3f;
1315 } else {
1316 /* 3-byte code */
1317 *len = 3;
1318 val = (cur[0] & 0xf) << 12;
1319 val |= (cur[1] & 0x3f) << 6;
1320 val |= cur[2] & 0x3f;
1321 }
1322 } else {
1323 /* 2-byte code */
1324 *len = 2;
1325 val = (cur[0] & 0x1f) << 6;
1326 val |= cur[1] & 0x3f;
1327 }
1328 if (!IS_CHAR(val)) {
1329 if ((ctxt->sax != NULL) &&
1330 (ctxt->sax->error != NULL))
1331 ctxt->sax->error(ctxt->userData,
1332 "Char 0x%X out of allowed range\n", val);
1333 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1334 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001335 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001336 }
1337 return(val);
1338 } else {
1339 /* 1-byte code */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001342 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349 }
1350 }
1351 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001352 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001353 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001354 * XML constructs only use < 128 chars
1355 */
1356 *len = 1;
1357 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001358 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001359 ctxt->nbChars++;
1360 ctxt->input->cur++;
1361 }
1362 return(0xA);
1363 }
1364 return((int) *ctxt->input->cur);
1365encoding_error:
1366 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001367 * An encoding problem may arise from a truncated input buffer
1368 * splitting a character in the middle. In that case do not raise
1369 * an error but return 0 to endicate an end of stream problem
1370 */
1371 if (ctxt->input->end - ctxt->input->cur < 4) {
1372 *len = 0;
1373 return(0);
1374 }
1375
1376 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001377 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001378 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001379 * declaration header. Report the error and switch the encoding
1380 * to ISO-Latin-1 (if you don't like this policy, just declare the
1381 * encoding !)
1382 */
1383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1384 ctxt->sax->error(ctxt->userData,
1385 "Input is not proper UTF-8, indicate encoding !\n");
1386 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001387 ctxt->input->cur[0], ctxt->input->cur[1],
1388 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001389 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001390 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001391 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1392
1393 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1394 *len = 1;
1395 return((int) *ctxt->input->cur);
1396}
1397
1398/**
1399 * xmlStringCurrentChar:
1400 * @ctxt: the XML parser context
1401 * @cur: pointer to the beginning of the char
1402 * @len: pointer to the length of the char read
1403 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001404 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001405 * bytes in the input buffer.
1406 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001407 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001408 */
1409
1410int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001411xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1412{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001413 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001414 /*
1415 * We are supposed to handle UTF8, check it's valid
1416 * From rfc2044: encoding of the Unicode values on UTF-8:
1417 *
1418 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1419 * 0000 0000-0000 007F 0xxxxxxx
1420 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1421 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1422 *
1423 * Check for the 0x110000 limit too
1424 */
1425 unsigned char c;
1426 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001427
Daniel Veillardd8224e02002-01-13 15:43:22 +00001428 c = *cur;
1429 if (c & 0x80) {
1430 if ((cur[1] & 0xc0) != 0x80)
1431 goto encoding_error;
1432 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001433
Daniel Veillardd8224e02002-01-13 15:43:22 +00001434 if ((cur[2] & 0xc0) != 0x80)
1435 goto encoding_error;
1436 if ((c & 0xf0) == 0xf0) {
1437 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1438 goto encoding_error;
1439 /* 4-byte code */
1440 *len = 4;
1441 val = (cur[0] & 0x7) << 18;
1442 val |= (cur[1] & 0x3f) << 12;
1443 val |= (cur[2] & 0x3f) << 6;
1444 val |= cur[3] & 0x3f;
1445 } else {
1446 /* 3-byte code */
1447 *len = 3;
1448 val = (cur[0] & 0xf) << 12;
1449 val |= (cur[1] & 0x3f) << 6;
1450 val |= cur[2] & 0x3f;
1451 }
1452 } else {
1453 /* 2-byte code */
1454 *len = 2;
1455 val = (cur[0] & 0x1f) << 6;
1456 val |= cur[1] & 0x3f;
1457 }
1458 if (!IS_CHAR(val)) {
1459 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1460 (ctxt->sax->error != NULL))
1461 ctxt->sax->error(ctxt->userData,
1462 "Char 0x%X out of allowed range\n",
1463 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001464 if (ctxt != NULL) {
1465 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1466 ctxt->wellFormed = 0;
1467 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1468 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001469 }
1470 return (val);
1471 } else {
1472 /* 1-byte code */
1473 *len = 1;
1474 return ((int) *cur);
1475 }
Owen Taylor3473f882001-02-23 17:55:21 +00001476 }
1477 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001478 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001479 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001480 * XML constructs only use < 128 chars
1481 */
1482 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001483 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001484encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001485
Owen Taylor3473f882001-02-23 17:55:21 +00001486 /*
1487 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001488 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001489 * declaration header. Report the error and switch the encoding
1490 * to ISO-Latin-1 (if you don't like this policy, just declare the
1491 * encoding !)
1492 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001493 if (ctxt != NULL) {
1494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1495 ctxt->sax->error(ctxt->userData,
1496 "Input is not proper UTF-8, indicate encoding !\n");
1497 ctxt->sax->error(ctxt->userData,
1498 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1499 ctxt->input->cur[0], ctxt->input->cur[1],
1500 ctxt->input->cur[2], ctxt->input->cur[3]);
1501 }
1502 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001503 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001504 }
Owen Taylor3473f882001-02-23 17:55:21 +00001505
1506 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001507 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001508}
1509
1510/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001511 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001512 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001513 * @val: the char value
1514 *
1515 * append the char value in the array
1516 *
1517 * Returns the number of xmlChar written
1518 */
Owen Taylor3473f882001-02-23 17:55:21 +00001519int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001520xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001521 /*
1522 * We are supposed to handle UTF8, check it's valid
1523 * From rfc2044: encoding of the Unicode values on UTF-8:
1524 *
1525 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1526 * 0000 0000-0000 007F 0xxxxxxx
1527 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1528 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1529 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001530 if (val >= 0x80) {
1531 xmlChar *savedout = out;
1532 int bits;
1533 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1534 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1535 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1536 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001537 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001539 val);
1540 return(0);
1541 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001542 for ( ; bits >= 0; bits-= 6)
1543 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1544 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001545 }
1546 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001547 return 1;
1548}
1549
1550/**
1551 * xmlCopyChar:
1552 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001553 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 * @val: the char value
1555 *
1556 * append the char value in the array
1557 *
1558 * Returns the number of xmlChar written
1559 */
1560
1561int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001562xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001563 /* the len parameter is ignored */
1564 if (val >= 0x80) {
1565 return(xmlCopyCharMultiByte (out, val));
1566 }
1567 *out = (xmlChar) val;
1568 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001569}
1570
1571/************************************************************************
1572 * *
1573 * Commodity functions to switch encodings *
1574 * *
1575 ************************************************************************/
1576
1577/**
1578 * xmlSwitchEncoding:
1579 * @ctxt: the parser context
1580 * @enc: the encoding value (number)
1581 *
1582 * change the input functions when discovering the character encoding
1583 * of a given entity.
1584 *
1585 * Returns 0 in case of success, -1 otherwise
1586 */
1587int
1588xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1589{
1590 xmlCharEncodingHandlerPtr handler;
1591
1592 switch (enc) {
1593 case XML_CHAR_ENCODING_ERROR:
1594 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001599 break;
1600 case XML_CHAR_ENCODING_NONE:
1601 /* let's assume it's UTF-8 without the XML decl */
1602 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1603 return(0);
1604 case XML_CHAR_ENCODING_UTF8:
1605 /* default encoding, no conversion should be needed */
1606 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001607
1608 /*
1609 * Errata on XML-1.0 June 20 2001
1610 * Specific handling of the Byte Order Mark for
1611 * UTF-8
1612 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001613 if ((ctxt->input != NULL) &&
1614 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001615 (ctxt->input->cur[1] == 0xBB) &&
1616 (ctxt->input->cur[2] == 0xBF)) {
1617 ctxt->input->cur += 3;
1618 }
Owen Taylor3473f882001-02-23 17:55:21 +00001619 return(0);
1620 default:
1621 break;
1622 }
1623 handler = xmlGetCharEncodingHandler(enc);
1624 if (handler == NULL) {
1625 /*
1626 * Default handlers.
1627 */
1628 switch (enc) {
1629 case XML_CHAR_ENCODING_ERROR:
1630 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1632 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1633 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001634 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001635 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1636 break;
1637 case XML_CHAR_ENCODING_NONE:
1638 /* let's assume it's UTF-8 without the XML decl */
1639 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1640 return(0);
1641 case XML_CHAR_ENCODING_UTF8:
1642 case XML_CHAR_ENCODING_ASCII:
1643 /* default encoding, no conversion should be needed */
1644 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1645 return(0);
1646 case XML_CHAR_ENCODING_UTF16LE:
1647 break;
1648 case XML_CHAR_ENCODING_UTF16BE:
1649 break;
1650 case XML_CHAR_ENCODING_UCS4LE:
1651 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1653 ctxt->sax->error(ctxt->userData,
1654 "char encoding USC4 little endian not supported\n");
1655 break;
1656 case XML_CHAR_ENCODING_UCS4BE:
1657 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1659 ctxt->sax->error(ctxt->userData,
1660 "char encoding USC4 big endian not supported\n");
1661 break;
1662 case XML_CHAR_ENCODING_EBCDIC:
1663 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1665 ctxt->sax->error(ctxt->userData,
1666 "char encoding EBCDIC not supported\n");
1667 break;
1668 case XML_CHAR_ENCODING_UCS4_2143:
1669 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "char encoding UCS4 2143 not supported\n");
1673 break;
1674 case XML_CHAR_ENCODING_UCS4_3412:
1675 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1677 ctxt->sax->error(ctxt->userData,
1678 "char encoding UCS4 3412 not supported\n");
1679 break;
1680 case XML_CHAR_ENCODING_UCS2:
1681 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1683 ctxt->sax->error(ctxt->userData,
1684 "char encoding UCS2 not supported\n");
1685 break;
1686 case XML_CHAR_ENCODING_8859_1:
1687 case XML_CHAR_ENCODING_8859_2:
1688 case XML_CHAR_ENCODING_8859_3:
1689 case XML_CHAR_ENCODING_8859_4:
1690 case XML_CHAR_ENCODING_8859_5:
1691 case XML_CHAR_ENCODING_8859_6:
1692 case XML_CHAR_ENCODING_8859_7:
1693 case XML_CHAR_ENCODING_8859_8:
1694 case XML_CHAR_ENCODING_8859_9:
1695 /*
1696 * We used to keep the internal content in the
1697 * document encoding however this turns being unmaintainable
1698 * So xmlGetCharEncodingHandler() will return non-null
1699 * values for this now.
1700 */
1701 if ((ctxt->inputNr == 1) &&
1702 (ctxt->encoding == NULL) &&
1703 (ctxt->input->encoding != NULL)) {
1704 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1705 }
1706 ctxt->charset = enc;
1707 return(0);
1708 case XML_CHAR_ENCODING_2022_JP:
1709 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1711 ctxt->sax->error(ctxt->userData,
1712 "char encoding ISO-2022-JPnot supported\n");
1713 break;
1714 case XML_CHAR_ENCODING_SHIFT_JIS:
1715 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1717 ctxt->sax->error(ctxt->userData,
1718 "char encoding Shift_JIS not supported\n");
1719 break;
1720 case XML_CHAR_ENCODING_EUC_JP:
1721 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "char encoding EUC-JPnot supported\n");
1725 break;
1726 }
1727 }
1728 if (handler == NULL)
1729 return(-1);
1730 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1731 return(xmlSwitchToEncoding(ctxt, handler));
1732}
1733
1734/**
1735 * xmlSwitchToEncoding:
1736 * @ctxt: the parser context
1737 * @handler: the encoding handler
1738 *
1739 * change the input functions when discovering the character encoding
1740 * of a given entity.
1741 *
1742 * Returns 0 in case of success, -1 otherwise
1743 */
1744int
1745xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1746{
1747 int nbchars;
1748
1749 if (handler != NULL) {
1750 if (ctxt->input != NULL) {
1751 if (ctxt->input->buf != NULL) {
1752 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001753 /*
1754 * Check in case the auto encoding detetection triggered
1755 * in already.
1756 */
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (ctxt->input->buf->encoder == handler)
1758 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001759
1760 /*
1761 * "UTF-16" can be used for both LE and BE
1762 */
1763 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1764 BAD_CAST "UTF-16", 6)) &&
1765 (!xmlStrncmp(BAD_CAST handler->name,
1766 BAD_CAST "UTF-16", 6))) {
1767 return(0);
1768 }
1769
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
1771 * Note: this is a bit dangerous, but that's what it
1772 * takes to use nearly compatible signature for different
1773 * encodings.
1774 */
1775 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1776 ctxt->input->buf->encoder = handler;
1777 return(0);
1778 }
1779 ctxt->input->buf->encoder = handler;
1780
1781 /*
1782 * Is there already some content down the pipe to convert ?
1783 */
1784 if ((ctxt->input->buf->buffer != NULL) &&
1785 (ctxt->input->buf->buffer->use > 0)) {
1786 int processed;
1787
1788 /*
1789 * Specific handling of the Byte Order Mark for
1790 * UTF-16
1791 */
1792 if ((handler->name != NULL) &&
1793 (!strcmp(handler->name, "UTF-16LE")) &&
1794 (ctxt->input->cur[0] == 0xFF) &&
1795 (ctxt->input->cur[1] == 0xFE)) {
1796 ctxt->input->cur += 2;
1797 }
1798 if ((handler->name != NULL) &&
1799 (!strcmp(handler->name, "UTF-16BE")) &&
1800 (ctxt->input->cur[0] == 0xFE) &&
1801 (ctxt->input->cur[1] == 0xFF)) {
1802 ctxt->input->cur += 2;
1803 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001804 /*
1805 * Errata on XML-1.0 June 20 2001
1806 * Specific handling of the Byte Order Mark for
1807 * UTF-8
1808 */
1809 if ((handler->name != NULL) &&
1810 (!strcmp(handler->name, "UTF-8")) &&
1811 (ctxt->input->cur[0] == 0xEF) &&
1812 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001813 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001814 ctxt->input->cur += 3;
1815 }
Owen Taylor3473f882001-02-23 17:55:21 +00001816
1817 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001818 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001819 * Move it as the raw buffer and create a new input buffer
1820 */
1821 processed = ctxt->input->cur - ctxt->input->base;
1822 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1823 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1824 ctxt->input->buf->buffer = xmlBufferCreate();
1825
1826 if (ctxt->html) {
1827 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001828 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001829 */
1830 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1831 ctxt->input->buf->buffer,
1832 ctxt->input->buf->raw);
1833 } else {
1834 /*
1835 * convert just enough to get
1836 * '<?xml version="1.0" encoding="xxx"?>'
1837 * parsed with the autodetected encoding
1838 * into the parser reading buffer.
1839 */
1840 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1841 ctxt->input->buf->buffer,
1842 ctxt->input->buf->raw);
1843 }
1844 if (nbchars < 0) {
1845 xmlGenericError(xmlGenericErrorContext,
1846 "xmlSwitchToEncoding: encoder error\n");
1847 return(-1);
1848 }
1849 ctxt->input->base =
1850 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001851 ctxt->input->end =
1852 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001853
1854 }
1855 return(0);
1856 } else {
1857 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1858 /*
1859 * When parsing a static memory array one must know the
1860 * size to be able to convert the buffer.
1861 */
1862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1863 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001864 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001865 return(-1);
1866 } else {
1867 int processed;
1868
1869 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001870 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001871 * Move it as the raw buffer and create a new input buffer
1872 */
1873 processed = ctxt->input->cur - ctxt->input->base;
1874
1875 ctxt->input->buf->raw = xmlBufferCreate();
1876 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1877 ctxt->input->length - processed);
1878 ctxt->input->buf->buffer = xmlBufferCreate();
1879
1880 /*
1881 * convert as much as possible of the raw input
1882 * to the parser reading buffer.
1883 */
1884 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1885 ctxt->input->buf->buffer,
1886 ctxt->input->buf->raw);
1887 if (nbchars < 0) {
1888 xmlGenericError(xmlGenericErrorContext,
1889 "xmlSwitchToEncoding: encoder error\n");
1890 return(-1);
1891 }
1892
1893 /*
1894 * Conversion succeeded, get rid of the old buffer
1895 */
1896 if ((ctxt->input->free != NULL) &&
1897 (ctxt->input->base != NULL))
1898 ctxt->input->free((xmlChar *) ctxt->input->base);
1899 ctxt->input->base =
1900 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001901 ctxt->input->end =
1902 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001903 }
1904 }
1905 } else {
1906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1907 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001908 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001909 return(-1);
1910 }
1911 /*
1912 * The parsing is now done in UTF8 natively
1913 */
1914 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1915 } else
1916 return(-1);
1917 return(0);
1918
1919}
1920
1921/************************************************************************
1922 * *
1923 * Commodity functions to handle entities processing *
1924 * *
1925 ************************************************************************/
1926
1927/**
1928 * xmlFreeInputStream:
1929 * @input: an xmlParserInputPtr
1930 *
1931 * Free up an input stream.
1932 */
1933void
1934xmlFreeInputStream(xmlParserInputPtr input) {
1935 if (input == NULL) return;
1936
1937 if (input->filename != NULL) xmlFree((char *) input->filename);
1938 if (input->directory != NULL) xmlFree((char *) input->directory);
1939 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1940 if (input->version != NULL) xmlFree((char *) input->version);
1941 if ((input->free != NULL) && (input->base != NULL))
1942 input->free((xmlChar *) input->base);
1943 if (input->buf != NULL)
1944 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001945 xmlFree(input);
1946}
1947
1948/**
1949 * xmlNewInputStream:
1950 * @ctxt: an XML parser context
1951 *
1952 * Create a new input stream structure
1953 * Returns the new input stream or NULL
1954 */
1955xmlParserInputPtr
1956xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1957 xmlParserInputPtr input;
1958
1959 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1960 if (input == NULL) {
1961 if (ctxt != NULL) {
1962 ctxt->errNo = XML_ERR_NO_MEMORY;
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
1965 "malloc: couldn't allocate a new input stream\n");
1966 ctxt->errNo = XML_ERR_NO_MEMORY;
1967 }
1968 return(NULL);
1969 }
1970 memset(input, 0, sizeof(xmlParserInput));
1971 input->line = 1;
1972 input->col = 1;
1973 input->standalone = -1;
1974 return(input);
1975}
1976
1977/**
1978 * xmlNewIOInputStream:
1979 * @ctxt: an XML parser context
1980 * @input: an I/O Input
1981 * @enc: the charset encoding if known
1982 *
1983 * Create a new input stream structure encapsulating the @input into
1984 * a stream suitable for the parser.
1985 *
1986 * Returns the new input stream or NULL
1987 */
1988xmlParserInputPtr
1989xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1990 xmlCharEncoding enc) {
1991 xmlParserInputPtr inputStream;
1992
1993 if (xmlParserDebugEntities)
1994 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1995 inputStream = xmlNewInputStream(ctxt);
1996 if (inputStream == NULL) {
1997 return(NULL);
1998 }
1999 inputStream->filename = NULL;
2000 inputStream->buf = input;
2001 inputStream->base = inputStream->buf->buffer->content;
2002 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002003 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (enc != XML_CHAR_ENCODING_NONE) {
2005 xmlSwitchEncoding(ctxt, enc);
2006 }
2007
2008 return(inputStream);
2009}
2010
2011/**
2012 * xmlNewEntityInputStream:
2013 * @ctxt: an XML parser context
2014 * @entity: an Entity pointer
2015 *
2016 * Create a new input stream based on an xmlEntityPtr
2017 *
2018 * Returns the new input stream or NULL
2019 */
2020xmlParserInputPtr
2021xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2022 xmlParserInputPtr input;
2023
2024 if (entity == NULL) {
2025 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2027 ctxt->sax->error(ctxt->userData,
2028 "internal: xmlNewEntityInputStream entity = NULL\n");
2029 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2030 return(NULL);
2031 }
2032 if (xmlParserDebugEntities)
2033 xmlGenericError(xmlGenericErrorContext,
2034 "new input from entity: %s\n", entity->name);
2035 if (entity->content == NULL) {
2036 switch (entity->etype) {
2037 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2038 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2040 ctxt->sax->error(ctxt->userData,
2041 "xmlNewEntityInputStream unparsed entity !\n");
2042 break;
2043 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2044 case XML_EXTERNAL_PARAMETER_ENTITY:
2045 return(xmlLoadExternalEntity((char *) entity->URI,
2046 (char *) entity->ExternalID, ctxt));
2047 case XML_INTERNAL_GENERAL_ENTITY:
2048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2049 ctxt->sax->error(ctxt->userData,
2050 "Internal entity %s without content !\n", entity->name);
2051 break;
2052 case XML_INTERNAL_PARAMETER_ENTITY:
2053 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2055 ctxt->sax->error(ctxt->userData,
2056 "Internal parameter entity %s without content !\n", entity->name);
2057 break;
2058 case XML_INTERNAL_PREDEFINED_ENTITY:
2059 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2061 ctxt->sax->error(ctxt->userData,
2062 "Predefined entity %s without content !\n", entity->name);
2063 break;
2064 }
2065 return(NULL);
2066 }
2067 input = xmlNewInputStream(ctxt);
2068 if (input == NULL) {
2069 return(NULL);
2070 }
2071 input->filename = (char *) entity->URI;
2072 input->base = entity->content;
2073 input->cur = entity->content;
2074 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002075 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002076 return(input);
2077}
2078
2079/**
2080 * xmlNewStringInputStream:
2081 * @ctxt: an XML parser context
2082 * @buffer: an memory buffer
2083 *
2084 * Create a new input stream based on a memory buffer.
2085 * Returns the new input stream
2086 */
2087xmlParserInputPtr
2088xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2089 xmlParserInputPtr input;
2090
2091 if (buffer == NULL) {
2092 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2094 ctxt->sax->error(ctxt->userData,
2095 "internal: xmlNewStringInputStream string = NULL\n");
2096 return(NULL);
2097 }
2098 if (xmlParserDebugEntities)
2099 xmlGenericError(xmlGenericErrorContext,
2100 "new fixed input: %.30s\n", buffer);
2101 input = xmlNewInputStream(ctxt);
2102 if (input == NULL) {
2103 return(NULL);
2104 }
2105 input->base = buffer;
2106 input->cur = buffer;
2107 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002108 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002109 return(input);
2110}
2111
2112/**
2113 * xmlNewInputFromFile:
2114 * @ctxt: an XML parser context
2115 * @filename: the filename to use as entity
2116 *
2117 * Create a new input stream based on a file.
2118 *
2119 * Returns the new input stream or NULL in case of error
2120 */
2121xmlParserInputPtr
2122xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2123 xmlParserInputBufferPtr buf;
2124 xmlParserInputPtr inputStream;
2125 char *directory = NULL;
2126 xmlChar *URI = NULL;
2127
2128 if (xmlParserDebugEntities)
2129 xmlGenericError(xmlGenericErrorContext,
2130 "new input from file: %s\n", filename);
2131 if (ctxt == NULL) return(NULL);
2132 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2133 if (buf == NULL)
2134 return(NULL);
2135
2136 URI = xmlStrdup((xmlChar *) filename);
2137 directory = xmlParserGetDirectory((const char *) URI);
2138
2139 inputStream = xmlNewInputStream(ctxt);
2140 if (inputStream == NULL) {
2141 if (directory != NULL) xmlFree((char *) directory);
2142 if (URI != NULL) xmlFree((char *) URI);
2143 return(NULL);
2144 }
2145
2146 inputStream->filename = (const char *) URI;
2147 inputStream->directory = directory;
2148 inputStream->buf = buf;
2149
2150 inputStream->base = inputStream->buf->buffer->content;
2151 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002152 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002153 if ((ctxt->directory == NULL) && (directory != NULL))
2154 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2155 return(inputStream);
2156}
2157
2158/************************************************************************
2159 * *
2160 * Commodity functions to handle parser contexts *
2161 * *
2162 ************************************************************************/
2163
2164/**
2165 * xmlInitParserCtxt:
2166 * @ctxt: an XML parser context
2167 *
2168 * Initialize a parser context
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002169 *
2170 * Returns 0 in case of success and -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00002171 */
2172
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002173int
Owen Taylor3473f882001-02-23 17:55:21 +00002174xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2175{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002176 if(ctxt==NULL) {
2177 xmlGenericError(xmlGenericErrorContext,
2178 "xmlInitParserCtxt: NULL context given\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002179 return(-1);
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002180 }
2181
Owen Taylor3473f882001-02-23 17:55:21 +00002182 xmlDefaultSAXHandlerInit();
2183
William M. Brack8b2c7f12002-11-22 05:07:29 +00002184 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2185 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002186 xmlGenericError(xmlGenericErrorContext,
2187 "xmlInitParserCtxt: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002188 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002189 }
2190 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002191 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002192
2193 /* Allocate the Input stack */
2194 ctxt->inputTab = (xmlParserInputPtr *)
2195 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2196 if (ctxt->inputTab == NULL) {
2197 xmlGenericError(xmlGenericErrorContext,
2198 "xmlInitParserCtxt: out of memory\n");
2199 ctxt->inputNr = 0;
2200 ctxt->inputMax = 0;
2201 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002202 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002203 }
2204 ctxt->inputNr = 0;
2205 ctxt->inputMax = 5;
2206 ctxt->input = NULL;
2207
2208 ctxt->version = NULL;
2209 ctxt->encoding = NULL;
2210 ctxt->standalone = -1;
2211 ctxt->hasExternalSubset = 0;
2212 ctxt->hasPErefs = 0;
2213 ctxt->html = 0;
2214 ctxt->external = 0;
2215 ctxt->instate = XML_PARSER_START;
2216 ctxt->token = 0;
2217 ctxt->directory = NULL;
2218
2219 /* Allocate the Node stack */
2220 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2221 if (ctxt->nodeTab == NULL) {
2222 xmlGenericError(xmlGenericErrorContext,
2223 "xmlInitParserCtxt: out of memory\n");
2224 ctxt->nodeNr = 0;
2225 ctxt->nodeMax = 0;
2226 ctxt->node = NULL;
2227 ctxt->inputNr = 0;
2228 ctxt->inputMax = 0;
2229 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002230 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 ctxt->nodeNr = 0;
2233 ctxt->nodeMax = 10;
2234 ctxt->node = NULL;
2235
2236 /* Allocate the Name stack */
2237 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2238 if (ctxt->nameTab == NULL) {
2239 xmlGenericError(xmlGenericErrorContext,
2240 "xmlInitParserCtxt: out of memory\n");
2241 ctxt->nodeNr = 0;
2242 ctxt->nodeMax = 0;
2243 ctxt->node = NULL;
2244 ctxt->inputNr = 0;
2245 ctxt->inputMax = 0;
2246 ctxt->input = NULL;
2247 ctxt->nameNr = 0;
2248 ctxt->nameMax = 0;
2249 ctxt->name = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002250 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002251 }
2252 ctxt->nameNr = 0;
2253 ctxt->nameMax = 10;
2254 ctxt->name = NULL;
2255
2256 /* Allocate the space stack */
2257 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2258 if (ctxt->spaceTab == NULL) {
2259 xmlGenericError(xmlGenericErrorContext,
2260 "xmlInitParserCtxt: out of memory\n");
2261 ctxt->nodeNr = 0;
2262 ctxt->nodeMax = 0;
2263 ctxt->node = NULL;
2264 ctxt->inputNr = 0;
2265 ctxt->inputMax = 0;
2266 ctxt->input = NULL;
2267 ctxt->nameNr = 0;
2268 ctxt->nameMax = 0;
2269 ctxt->name = NULL;
2270 ctxt->spaceNr = 0;
2271 ctxt->spaceMax = 0;
2272 ctxt->space = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002273 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002274 }
2275 ctxt->spaceNr = 1;
2276 ctxt->spaceMax = 10;
2277 ctxt->spaceTab[0] = -1;
2278 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002279 ctxt->userData = ctxt;
2280 ctxt->myDoc = NULL;
2281 ctxt->wellFormed = 1;
2282 ctxt->valid = 1;
2283 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2284 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2285 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002286 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002287 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002288 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002289 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002290
Owen Taylor3473f882001-02-23 17:55:21 +00002291 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002292 ctxt->vctxt.error = xmlParserValidityError;
2293 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002294 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002295 if (xmlGetWarningsDefaultValue == 0)
2296 ctxt->vctxt.warning = NULL;
2297 else
2298 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002299 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002300 }
2301 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2302 ctxt->record_info = 0;
2303 ctxt->nbChars = 0;
2304 ctxt->checkIndex = 0;
2305 ctxt->inSubset = 0;
2306 ctxt->errNo = XML_ERR_OK;
2307 ctxt->depth = 0;
2308 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002309 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002310 xmlInitNodeInfoSeq(&ctxt->node_seq);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002311 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002312}
2313
2314/**
2315 * xmlFreeParserCtxt:
2316 * @ctxt: an XML parser context
2317 *
2318 * Free all the memory used by a parser context. However the parsed
2319 * document in ctxt->myDoc is not freed.
2320 */
2321
2322void
2323xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2324{
2325 xmlParserInputPtr input;
2326 xmlChar *oldname;
2327
2328 if (ctxt == NULL) return;
2329
2330 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2331 xmlFreeInputStream(input);
2332 }
2333 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2334 xmlFree(oldname);
2335 }
2336 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2337 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2338 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2339 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2340 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2341 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2342 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2343 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2344 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2346 xmlFree(ctxt->sax);
2347 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002348 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002349#ifdef LIBXML_CATALOG_ENABLED
2350 if (ctxt->catalogs != NULL)
2351 xmlCatalogFreeLocal(ctxt->catalogs);
2352#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002353 xmlFree(ctxt);
2354}
2355
2356/**
2357 * xmlNewParserCtxt:
2358 *
2359 * Allocate and initialize a new parser context.
2360 *
2361 * Returns the xmlParserCtxtPtr or NULL
2362 */
2363
2364xmlParserCtxtPtr
2365xmlNewParserCtxt()
2366{
2367 xmlParserCtxtPtr ctxt;
2368
2369 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2370 if (ctxt == NULL) {
2371 xmlGenericError(xmlGenericErrorContext,
2372 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002373 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002374 return(NULL);
2375 }
2376 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002377 if (xmlInitParserCtxt(ctxt) < 0) {
2378 xmlFreeParserCtxt(ctxt);
2379 return(NULL);
2380 }
Owen Taylor3473f882001-02-23 17:55:21 +00002381 return(ctxt);
2382}
2383
2384/************************************************************************
2385 * *
2386 * Handling of node informations *
2387 * *
2388 ************************************************************************/
2389
2390/**
2391 * xmlClearParserCtxt:
2392 * @ctxt: an XML parser context
2393 *
2394 * Clear (release owned resources) and reinitialize a parser context
2395 */
2396
2397void
2398xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2399{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002400 if (ctxt==NULL)
2401 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002402 xmlClearNodeInfoSeq(&ctxt->node_seq);
2403 xmlInitParserCtxt(ctxt);
2404}
2405
2406/**
2407 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002408 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002409 * @node: an XML node within the tree
2410 *
2411 * Find the parser node info struct for a given node
2412 *
2413 * Returns an xmlParserNodeInfo block pointer or NULL
2414 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002415const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2416 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002417{
2418 unsigned long pos;
2419
2420 /* Find position where node should be at */
2421 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002422 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002423 return &ctx->node_seq.buffer[pos];
2424 else
2425 return NULL;
2426}
2427
2428
2429/**
2430 * xmlInitNodeInfoSeq:
2431 * @seq: a node info sequence pointer
2432 *
2433 * -- Initialize (set to initial state) node info sequence
2434 */
2435void
2436xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2437{
2438 seq->length = 0;
2439 seq->maximum = 0;
2440 seq->buffer = NULL;
2441}
2442
2443/**
2444 * xmlClearNodeInfoSeq:
2445 * @seq: a node info sequence pointer
2446 *
2447 * -- Clear (release memory and reinitialize) node
2448 * info sequence
2449 */
2450void
2451xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2452{
2453 if ( seq->buffer != NULL )
2454 xmlFree(seq->buffer);
2455 xmlInitNodeInfoSeq(seq);
2456}
2457
2458
2459/**
2460 * xmlParserFindNodeInfoIndex:
2461 * @seq: a node info sequence pointer
2462 * @node: an XML node pointer
2463 *
2464 *
2465 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2466 * the given node is or should be at in a sorted sequence
2467 *
2468 * Returns a long indicating the position of the record
2469 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002470unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2471 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002472{
2473 unsigned long upper, lower, middle;
2474 int found = 0;
2475
2476 /* Do a binary search for the key */
2477 lower = 1;
2478 upper = seq->length;
2479 middle = 0;
2480 while ( lower <= upper && !found) {
2481 middle = lower + (upper - lower) / 2;
2482 if ( node == seq->buffer[middle - 1].node )
2483 found = 1;
2484 else if ( node < seq->buffer[middle - 1].node )
2485 upper = middle - 1;
2486 else
2487 lower = middle + 1;
2488 }
2489
2490 /* Return position */
2491 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2492 return middle;
2493 else
2494 return middle - 1;
2495}
2496
2497
2498/**
2499 * xmlParserAddNodeInfo:
2500 * @ctxt: an XML parser context
2501 * @info: a node info sequence pointer
2502 *
2503 * Insert node info record into the sorted sequence
2504 */
2505void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002506xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002507 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002508{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002509 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002510
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002511 /* Find pos and check to see if node is already in the sequence */
2512 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2513 info->node);
2514 if (pos < ctxt->node_seq.length
2515 && ctxt->node_seq.buffer[pos].node == info->node) {
2516 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002517 }
2518
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002519 /* Otherwise, we need to add new node to buffer */
2520 else {
2521 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2522 xmlParserNodeInfo *tmp_buffer;
2523 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002524
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002525 if (ctxt->node_seq.maximum == 0)
2526 ctxt->node_seq.maximum = 2;
2527 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2528 (2 * ctxt->node_seq.maximum));
2529
2530 if (ctxt->node_seq.buffer == NULL)
Daniel Veillardc4f65ab2003-04-21 23:07:45 +00002531 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002532 else
2533 tmp_buffer =
2534 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2535 byte_size);
2536
2537 if (tmp_buffer == NULL) {
2538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2539 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2540 ctxt->errNo = XML_ERR_NO_MEMORY;
2541 return;
2542 }
2543 ctxt->node_seq.buffer = tmp_buffer;
2544 ctxt->node_seq.maximum *= 2;
2545 }
2546
2547 /* If position is not at end, move elements out of the way */
2548 if (pos != ctxt->node_seq.length) {
2549 unsigned long i;
2550
2551 for (i = ctxt->node_seq.length; i > pos; i--)
2552 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2553 }
2554
2555 /* Copy element and increase length */
2556 ctxt->node_seq.buffer[pos] = *info;
2557 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
Owen Taylor3473f882001-02-23 17:55:21 +00002559}
2560
2561/************************************************************************
2562 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002563 * Defaults settings *
2564 * *
2565 ************************************************************************/
2566/**
2567 * xmlPedanticParserDefault:
2568 * @val: int 0 or 1
2569 *
2570 * Set and return the previous value for enabling pedantic warnings.
2571 *
2572 * Returns the last value for 0 for no substitution, 1 for substitution.
2573 */
2574
2575int
2576xmlPedanticParserDefault(int val) {
2577 int old = xmlPedanticParserDefaultValue;
2578
2579 xmlPedanticParserDefaultValue = val;
2580 return(old);
2581}
2582
2583/**
2584 * xmlLineNumbersDefault:
2585 * @val: int 0 or 1
2586 *
2587 * Set and return the previous value for enabling line numbers in elements
2588 * contents. This may break on old application and is turned off by default.
2589 *
2590 * Returns the last value for 0 for no substitution, 1 for substitution.
2591 */
2592
2593int
2594xmlLineNumbersDefault(int val) {
2595 int old = xmlLineNumbersDefaultValue;
2596
2597 xmlLineNumbersDefaultValue = val;
2598 return(old);
2599}
2600
2601/**
2602 * xmlSubstituteEntitiesDefault:
2603 * @val: int 0 or 1
2604 *
2605 * Set and return the previous value for default entity support.
2606 * Initially the parser always keep entity references instead of substituting
2607 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002608 * default parser behavior
2609 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002610 * file basis.
2611 *
2612 * Returns the last value for 0 for no substitution, 1 for substitution.
2613 */
2614
2615int
2616xmlSubstituteEntitiesDefault(int val) {
2617 int old = xmlSubstituteEntitiesDefaultValue;
2618
2619 xmlSubstituteEntitiesDefaultValue = val;
2620 return(old);
2621}
2622
2623/**
2624 * xmlKeepBlanksDefault:
2625 * @val: int 0 or 1
2626 *
2627 * Set and return the previous value for default blanks text nodes support.
2628 * The 1.x version of the parser used an heuristic to try to detect
2629 * ignorable white spaces. As a result the SAX callback was generating
2630 * ignorableWhitespace() callbacks instead of characters() one, and when
2631 * using the DOM output text nodes containing those blanks were not generated.
2632 * The 2.x and later version will switch to the XML standard way and
2633 * ignorableWhitespace() are only generated when running the parser in
2634 * validating mode and when the current element doesn't allow CDATA or
2635 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002636 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002637 * on 1.X libs and to switch back to the old mode for compatibility when
2638 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2639 * by using xmlIsBlankNode() commodity function to detect the "empty"
2640 * nodes generated.
2641 * This value also affect autogeneration of indentation when saving code
2642 * if blanks sections are kept, indentation is not generated.
2643 *
2644 * Returns the last value for 0 for no substitution, 1 for substitution.
2645 */
2646
2647int
2648xmlKeepBlanksDefault(int val) {
2649 int old = xmlKeepBlanksDefaultValue;
2650
2651 xmlKeepBlanksDefaultValue = val;
2652 xmlIndentTreeOutput = !val;
2653 return(old);
2654}
2655
2656/************************************************************************
2657 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * Deprecated functions kept for compatibility *
2659 * *
2660 ************************************************************************/
2661
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002662/**
2663 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002664 * @lang: pointer to the string value
2665 *
2666 * Checks that the value conforms to the LanguageID production:
2667 *
2668 * NOTE: this is somewhat deprecated, those productions were removed from
2669 * the XML Second edition.
2670 *
2671 * [33] LanguageID ::= Langcode ('-' Subcode)*
2672 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2673 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2674 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2675 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2676 * [38] Subcode ::= ([a-z] | [A-Z])+
2677 *
2678 * Returns 1 if correct 0 otherwise
2679 **/
2680int
2681xmlCheckLanguageID(const xmlChar *lang) {
2682 const xmlChar *cur = lang;
2683
2684 if (cur == NULL)
2685 return(0);
2686 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2687 ((cur[0] == 'I') && (cur[1] == '-'))) {
2688 /*
2689 * IANA code
2690 */
2691 cur += 2;
2692 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2693 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2694 cur++;
2695 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2696 ((cur[0] == 'X') && (cur[1] == '-'))) {
2697 /*
2698 * User code
2699 */
2700 cur += 2;
2701 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2702 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2703 cur++;
2704 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2705 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2706 /*
2707 * ISO639
2708 */
2709 cur++;
2710 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2711 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2712 cur++;
2713 else
2714 return(0);
2715 } else
2716 return(0);
2717 while (cur[0] != 0) { /* non input consuming */
2718 if (cur[0] != '-')
2719 return(0);
2720 cur++;
2721 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2722 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2723 cur++;
2724 else
2725 return(0);
2726 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2727 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2728 cur++;
2729 }
2730 return(1);
2731}
2732
2733/**
2734 * xmlDecodeEntities:
2735 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002736 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002737 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002738 * @end: an end marker xmlChar, 0 if none
2739 * @end2: an end marker xmlChar, 0 if none
2740 * @end3: an end marker xmlChar, 0 if none
2741 *
2742 * This function is deprecated, we now always process entities content
2743 * through xmlStringDecodeEntities
2744 *
2745 * TODO: remove it in next major release.
2746 *
2747 * [67] Reference ::= EntityRef | CharRef
2748 *
2749 * [69] PEReference ::= '%' Name ';'
2750 *
2751 * Returns A newly allocated string with the substitution done. The caller
2752 * must deallocate it !
2753 */
2754xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002755xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2756 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002757#if 0
2758 xmlChar *buffer = NULL;
2759 unsigned int buffer_size = 0;
2760 unsigned int nbchars = 0;
2761
2762 xmlChar *current = NULL;
2763 xmlEntityPtr ent;
2764 unsigned int max = (unsigned int) len;
2765 int c,l;
2766#endif
2767
2768 static int deprecated = 0;
2769 if (!deprecated) {
2770 xmlGenericError(xmlGenericErrorContext,
2771 "xmlDecodeEntities() deprecated function reached\n");
2772 deprecated = 1;
2773 }
2774
2775#if 0
2776 if (ctxt->depth > 40) {
2777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2778 ctxt->sax->error(ctxt->userData,
2779 "Detected entity reference loop\n");
2780 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002781 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002782 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2783 return(NULL);
2784 }
2785
2786 /*
2787 * allocate a translation buffer.
2788 */
2789 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2790 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2791 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002792 xmlGenericError(xmlGenericErrorContext,
2793 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002794 return(NULL);
2795 }
2796
2797 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002798 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002799 */
2800 GROW;
2801 c = CUR_CHAR(l);
2802 while ((nbchars < max) && (c != end) && /* NOTUSED */
2803 (c != end2) && (c != end3)) {
2804 GROW;
2805 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002806 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002807 int val = xmlParseCharRef(ctxt);
2808 COPY_BUF(0,buffer,nbchars,val);
2809 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002810 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002811 (what & XML_SUBSTITUTE_REF)) {
2812 if (xmlParserDebugEntities)
2813 xmlGenericError(xmlGenericErrorContext,
2814 "decoding Entity Reference\n");
2815 ent = xmlParseEntityRef(ctxt);
2816 if ((ent != NULL) &&
2817 (ctxt->replaceEntities != 0)) {
2818 current = ent->content;
2819 while (*current != 0) { /* non input consuming loop */
2820 buffer[nbchars++] = *current++;
2821 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2822 growBuffer(buffer);
2823 }
2824 }
2825 } else if (ent != NULL) {
2826 const xmlChar *cur = ent->name;
2827
2828 buffer[nbchars++] = '&';
2829 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2830 growBuffer(buffer);
2831 }
2832 while (*cur != 0) { /* non input consuming loop */
2833 buffer[nbchars++] = *cur++;
2834 }
2835 buffer[nbchars++] = ';';
2836 }
2837 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2838 /*
2839 * a PEReference induce to switch the entity flow,
2840 * we break here to flush the current set of chars
2841 * parsed if any. We will be called back later.
2842 */
2843 if (xmlParserDebugEntities)
2844 xmlGenericError(xmlGenericErrorContext,
2845 "decoding PE Reference\n");
2846 if (nbchars != 0) break;
2847
2848 xmlParsePEReference(ctxt);
2849
2850 /*
2851 * Pop-up of finished entities.
2852 */
2853 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2854 xmlPopInput(ctxt);
2855
2856 break;
2857 } else {
2858 COPY_BUF(l,buffer,nbchars,c);
2859 NEXTL(l);
2860 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2861 growBuffer(buffer);
2862 }
2863 }
2864 c = CUR_CHAR(l);
2865 }
2866 buffer[nbchars++] = 0;
2867 return(buffer);
2868#endif
2869 return(NULL);
2870}
2871
2872/**
2873 * xmlNamespaceParseNCName:
2874 * @ctxt: an XML parser context
2875 *
2876 * parse an XML namespace name.
2877 *
2878 * TODO: this seems not in use anymore, the namespace handling is done on
2879 * top of the SAX interfaces, i.e. not on raw input.
2880 *
2881 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2882 *
2883 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2884 * CombiningChar | Extender
2885 *
2886 * Returns the namespace name or NULL
2887 */
2888
2889xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002890xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002891#if 0
2892 xmlChar buf[XML_MAX_NAMELEN + 5];
2893 int len = 0, l;
2894 int cur = CUR_CHAR(l);
2895#endif
2896
2897 static int deprecated = 0;
2898 if (!deprecated) {
2899 xmlGenericError(xmlGenericErrorContext,
2900 "xmlNamespaceParseNCName() deprecated function reached\n");
2901 deprecated = 1;
2902 }
2903
2904#if 0
2905 /* load first the value of the char !!! */
2906 GROW;
2907 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2908
2909xmlGenericError(xmlGenericErrorContext,
2910 "xmlNamespaceParseNCName: reached loop 3\n");
2911 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2912 (cur == '.') || (cur == '-') ||
2913 (cur == '_') ||
2914 (IS_COMBINING(cur)) ||
2915 (IS_EXTENDER(cur))) {
2916 COPY_BUF(l,buf,len,cur);
2917 NEXTL(l);
2918 cur = CUR_CHAR(l);
2919 if (len >= XML_MAX_NAMELEN) {
2920 xmlGenericError(xmlGenericErrorContext,
2921 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2922 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2923 (cur == '.') || (cur == '-') ||
2924 (cur == '_') ||
2925 (IS_COMBINING(cur)) ||
2926 (IS_EXTENDER(cur))) {
2927 NEXTL(l);
2928 cur = CUR_CHAR(l);
2929 }
2930 break;
2931 }
2932 }
2933 return(xmlStrndup(buf, len));
2934#endif
2935 return(NULL);
2936}
2937
2938/**
2939 * xmlNamespaceParseQName:
2940 * @ctxt: an XML parser context
2941 * @prefix: a xmlChar **
2942 *
2943 * TODO: this seems not in use anymore, the namespace handling is done on
2944 * top of the SAX interfaces, i.e. not on raw input.
2945 *
2946 * parse an XML qualified name
2947 *
2948 * [NS 5] QName ::= (Prefix ':')? LocalPart
2949 *
2950 * [NS 6] Prefix ::= NCName
2951 *
2952 * [NS 7] LocalPart ::= NCName
2953 *
2954 * Returns the local part, and prefix is updated
2955 * to get the Prefix if any.
2956 */
2957
2958xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002959xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002960
2961 static int deprecated = 0;
2962 if (!deprecated) {
2963 xmlGenericError(xmlGenericErrorContext,
2964 "xmlNamespaceParseQName() deprecated function reached\n");
2965 deprecated = 1;
2966 }
2967
2968#if 0
2969 xmlChar *ret = NULL;
2970
2971 *prefix = NULL;
2972 ret = xmlNamespaceParseNCName(ctxt);
2973 if (RAW == ':') {
2974 *prefix = ret;
2975 NEXT;
2976 ret = xmlNamespaceParseNCName(ctxt);
2977 }
2978
2979 return(ret);
2980#endif
2981 return(NULL);
2982}
2983
2984/**
2985 * xmlNamespaceParseNSDef:
2986 * @ctxt: an XML parser context
2987 *
2988 * parse a namespace prefix declaration
2989 *
2990 * TODO: this seems not in use anymore, the namespace handling is done on
2991 * top of the SAX interfaces, i.e. not on raw input.
2992 *
2993 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2994 *
2995 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2996 *
2997 * Returns the namespace name
2998 */
2999
3000xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003001xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003002 static int deprecated = 0;
3003 if (!deprecated) {
3004 xmlGenericError(xmlGenericErrorContext,
3005 "xmlNamespaceParseNSDef() deprecated function reached\n");
3006 deprecated = 1;
3007 }
3008 return(NULL);
3009#if 0
3010 xmlChar *name = NULL;
3011
3012 if ((RAW == 'x') && (NXT(1) == 'm') &&
3013 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3014 (NXT(4) == 's')) {
3015 SKIP(5);
3016 if (RAW == ':') {
3017 NEXT;
3018 name = xmlNamespaceParseNCName(ctxt);
3019 }
3020 }
3021 return(name);
3022#endif
3023}
3024
3025/**
3026 * xmlParseQuotedString:
3027 * @ctxt: an XML parser context
3028 *
3029 * Parse and return a string between quotes or doublequotes
3030 *
3031 * TODO: Deprecated, to be removed at next drop of binary compatibility
3032 *
3033 * Returns the string parser or NULL.
3034 */
3035xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003036xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003037 static int deprecated = 0;
3038 if (!deprecated) {
3039 xmlGenericError(xmlGenericErrorContext,
3040 "xmlParseQuotedString() deprecated function reached\n");
3041 deprecated = 1;
3042 }
3043 return(NULL);
3044
3045#if 0
3046 xmlChar *buf = NULL;
3047 int len = 0,l;
3048 int size = XML_PARSER_BUFFER_SIZE;
3049 int c;
3050
3051 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3052 if (buf == NULL) {
3053 xmlGenericError(xmlGenericErrorContext,
3054 "malloc of %d byte failed\n", size);
3055 return(NULL);
3056 }
3057xmlGenericError(xmlGenericErrorContext,
3058 "xmlParseQuotedString: reached loop 4\n");
3059 if (RAW == '"') {
3060 NEXT;
3061 c = CUR_CHAR(l);
3062 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3063 if (len + 5 >= size) {
3064 size *= 2;
3065 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3066 if (buf == NULL) {
3067 xmlGenericError(xmlGenericErrorContext,
3068 "realloc of %d byte failed\n", size);
3069 return(NULL);
3070 }
3071 }
3072 COPY_BUF(l,buf,len,c);
3073 NEXTL(l);
3074 c = CUR_CHAR(l);
3075 }
3076 if (c != '"') {
3077 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "String not closed \"%.50s\"\n", buf);
3081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003083 } else {
3084 NEXT;
3085 }
3086 } else if (RAW == '\''){
3087 NEXT;
3088 c = CUR;
3089 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3090 if (len + 1 >= size) {
3091 size *= 2;
3092 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3093 if (buf == NULL) {
3094 xmlGenericError(xmlGenericErrorContext,
3095 "realloc of %d byte failed\n", size);
3096 return(NULL);
3097 }
3098 }
3099 buf[len++] = c;
3100 NEXT;
3101 c = CUR;
3102 }
3103 if (RAW != '\'') {
3104 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3106 ctxt->sax->error(ctxt->userData,
3107 "String not closed \"%.50s\"\n", buf);
3108 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003109 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003110 } else {
3111 NEXT;
3112 }
3113 }
3114 return(buf);
3115#endif
3116}
3117
3118/**
3119 * xmlParseNamespace:
3120 * @ctxt: an XML parser context
3121 *
3122 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3123 *
3124 * This is what the older xml-name Working Draft specified, a bunch of
3125 * other stuff may still rely on it, so support is still here as
3126 * if it was declared on the root of the Tree:-(
3127 *
3128 * TODO: remove from library
3129 *
3130 * To be removed at next drop of binary compatibility
3131 */
3132
3133void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003134xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003135 static int deprecated = 0;
3136 if (!deprecated) {
3137 xmlGenericError(xmlGenericErrorContext,
3138 "xmlParseNamespace() deprecated function reached\n");
3139 deprecated = 1;
3140 }
3141
3142#if 0
3143 xmlChar *href = NULL;
3144 xmlChar *prefix = NULL;
3145 int garbage = 0;
3146
3147 /*
3148 * We just skipped "namespace" or "xml:namespace"
3149 */
3150 SKIP_BLANKS;
3151
3152xmlGenericError(xmlGenericErrorContext,
3153 "xmlParseNamespace: reached loop 5\n");
3154 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3155 /*
3156 * We can have "ns" or "prefix" attributes
3157 * Old encoding as 'href' or 'AS' attributes is still supported
3158 */
3159 if ((RAW == 'n') && (NXT(1) == 's')) {
3160 garbage = 0;
3161 SKIP(2);
3162 SKIP_BLANKS;
3163
3164 if (RAW != '=') continue;
3165 NEXT;
3166 SKIP_BLANKS;
3167
3168 href = xmlParseQuotedString(ctxt);
3169 SKIP_BLANKS;
3170 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3171 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3172 garbage = 0;
3173 SKIP(4);
3174 SKIP_BLANKS;
3175
3176 if (RAW != '=') continue;
3177 NEXT;
3178 SKIP_BLANKS;
3179
3180 href = xmlParseQuotedString(ctxt);
3181 SKIP_BLANKS;
3182 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3183 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3184 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3185 garbage = 0;
3186 SKIP(6);
3187 SKIP_BLANKS;
3188
3189 if (RAW != '=') continue;
3190 NEXT;
3191 SKIP_BLANKS;
3192
3193 prefix = xmlParseQuotedString(ctxt);
3194 SKIP_BLANKS;
3195 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3196 garbage = 0;
3197 SKIP(2);
3198 SKIP_BLANKS;
3199
3200 if (RAW != '=') continue;
3201 NEXT;
3202 SKIP_BLANKS;
3203
3204 prefix = xmlParseQuotedString(ctxt);
3205 SKIP_BLANKS;
3206 } else if ((RAW == '?') && (NXT(1) == '>')) {
3207 garbage = 0;
3208 NEXT;
3209 } else {
3210 /*
3211 * Found garbage when parsing the namespace
3212 */
3213 if (!garbage) {
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData,
3216 "xmlParseNamespace found garbage\n");
3217 }
3218 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3219 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003220 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003221 NEXT;
3222 }
3223 }
3224
3225 MOVETO_ENDTAG(CUR_PTR);
3226 NEXT;
3227
3228 /*
3229 * Register the DTD.
3230 if (href != NULL)
3231 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3232 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3233 */
3234
3235 if (prefix != NULL) xmlFree(prefix);
3236 if (href != NULL) xmlFree(href);
3237#endif
3238}
3239
3240/**
3241 * xmlScanName:
3242 * @ctxt: an XML parser context
3243 *
3244 * Trickery: parse an XML name but without consuming the input flow
3245 * Needed for rollback cases. Used only when parsing entities references.
3246 *
3247 * TODO: seems deprecated now, only used in the default part of
3248 * xmlParserHandleReference
3249 *
3250 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3251 * CombiningChar | Extender
3252 *
3253 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3254 *
3255 * [6] Names ::= Name (S Name)*
3256 *
3257 * Returns the Name parsed or NULL
3258 */
3259
3260xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003261xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003262 static int deprecated = 0;
3263 if (!deprecated) {
3264 xmlGenericError(xmlGenericErrorContext,
3265 "xmlScanName() deprecated function reached\n");
3266 deprecated = 1;
3267 }
3268 return(NULL);
3269
3270#if 0
3271 xmlChar buf[XML_MAX_NAMELEN];
3272 int len = 0;
3273
3274 GROW;
3275 if (!IS_LETTER(RAW) && (RAW != '_') &&
3276 (RAW != ':')) {
3277 return(NULL);
3278 }
3279
3280
3281 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3282 (NXT(len) == '.') || (NXT(len) == '-') ||
3283 (NXT(len) == '_') || (NXT(len) == ':') ||
3284 (IS_COMBINING(NXT(len))) ||
3285 (IS_EXTENDER(NXT(len)))) {
3286 GROW;
3287 buf[len] = NXT(len);
3288 len++;
3289 if (len >= XML_MAX_NAMELEN) {
3290 xmlGenericError(xmlGenericErrorContext,
3291 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3292 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3293 (IS_DIGIT(NXT(len))) ||
3294 (NXT(len) == '.') || (NXT(len) == '-') ||
3295 (NXT(len) == '_') || (NXT(len) == ':') ||
3296 (IS_COMBINING(NXT(len))) ||
3297 (IS_EXTENDER(NXT(len))))
3298 len++;
3299 break;
3300 }
3301 }
3302 return(xmlStrndup(buf, len));
3303#endif
3304}
3305
3306/**
3307 * xmlParserHandleReference:
3308 * @ctxt: the parser context
3309 *
3310 * TODO: Remove, now deprecated ... the test is done directly in the
3311 * content parsing
3312 * routines.
3313 *
3314 * [67] Reference ::= EntityRef | CharRef
3315 *
3316 * [68] EntityRef ::= '&' Name ';'
3317 *
3318 * [ WFC: Entity Declared ]
3319 * the Name given in the entity reference must match that in an entity
3320 * declaration, except that well-formed documents need not declare any
3321 * of the following entities: amp, lt, gt, apos, quot.
3322 *
3323 * [ WFC: Parsed Entity ]
3324 * An entity reference must not contain the name of an unparsed entity
3325 *
3326 * [66] CharRef ::= '&#' [0-9]+ ';' |
3327 * '&#x' [0-9a-fA-F]+ ';'
3328 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003329 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003330 * the handling is done accordingly to
3331 * http://www.w3.org/TR/REC-xml#entproc
3332 */
3333void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003334xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 static int deprecated = 0;
3336 if (!deprecated) {
3337 xmlGenericError(xmlGenericErrorContext,
3338 "xmlParserHandleReference() deprecated function reached\n");
3339 deprecated = 1;
3340 }
3341
Owen Taylor3473f882001-02-23 17:55:21 +00003342 return;
3343}
3344
3345/**
3346 * xmlHandleEntity:
3347 * @ctxt: an XML parser context
3348 * @entity: an XML entity pointer.
3349 *
3350 * Default handling of defined entities, when should we define a new input
3351 * stream ? When do we just handle that as a set of chars ?
3352 *
3353 * OBSOLETE: to be removed at some point.
3354 */
3355
3356void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003357xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003358 static int deprecated = 0;
3359 if (!deprecated) {
3360 xmlGenericError(xmlGenericErrorContext,
3361 "xmlHandleEntity() deprecated function reached\n");
3362 deprecated = 1;
3363 }
3364
3365#if 0
3366 int len;
3367 xmlParserInputPtr input;
3368
3369 if (entity->content == NULL) {
3370 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3372 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3373 entity->name);
3374 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003375 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return;
3377 }
3378 len = xmlStrlen(entity->content);
3379 if (len <= 2) goto handle_as_char;
3380
3381 /*
3382 * Redefine its content as an input stream.
3383 */
3384 input = xmlNewEntityInputStream(ctxt, entity);
3385 xmlPushInput(ctxt, input);
3386 return;
3387
3388handle_as_char:
3389 /*
3390 * Just handle the content as a set of chars.
3391 */
3392 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3393 (ctxt->sax->characters != NULL))
3394 ctxt->sax->characters(ctxt->userData, entity->content, len);
3395#endif
3396}
3397
3398/**
3399 * xmlNewGlobalNs:
3400 * @doc: the document carrying the namespace
3401 * @href: the URI associated
3402 * @prefix: the prefix for the namespace
3403 *
3404 * Creation of a Namespace, the old way using PI and without scoping
3405 * DEPRECATED !!!
3406 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003407 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003408 */
3409xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003410xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3411 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003412 static int deprecated = 0;
3413 if (!deprecated) {
3414 xmlGenericError(xmlGenericErrorContext,
3415 "xmlNewGlobalNs() deprecated function reached\n");
3416 deprecated = 1;
3417 }
3418 return(NULL);
3419#if 0
3420 xmlNodePtr root;
3421
3422 xmlNsPtr cur;
3423
3424 root = xmlDocGetRootElement(doc);
3425 if (root != NULL)
3426 return(xmlNewNs(root, href, prefix));
3427
3428 /*
3429 * if there is no root element yet, create an old Namespace type
3430 * and it will be moved to the root at save time.
3431 */
3432 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3433 if (cur == NULL) {
3434 xmlGenericError(xmlGenericErrorContext,
3435 "xmlNewGlobalNs : malloc failed\n");
3436 return(NULL);
3437 }
3438 memset(cur, 0, sizeof(xmlNs));
3439 cur->type = XML_GLOBAL_NAMESPACE;
3440
3441 if (href != NULL)
3442 cur->href = xmlStrdup(href);
3443 if (prefix != NULL)
3444 cur->prefix = xmlStrdup(prefix);
3445
3446 /*
3447 * Add it at the end to preserve parsing order ...
3448 */
3449 if (doc != NULL) {
3450 if (doc->oldNs == NULL) {
3451 doc->oldNs = cur;
3452 } else {
3453 xmlNsPtr prev = doc->oldNs;
3454
3455 while (prev->next != NULL) prev = prev->next;
3456 prev->next = cur;
3457 }
3458 }
3459
3460 return(NULL);
3461#endif
3462}
3463
3464/**
3465 * xmlUpgradeOldNs:
3466 * @doc: a document pointer
3467 *
3468 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3469 * DEPRECATED
3470 */
3471void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003472xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003473 static int deprecated = 0;
3474 if (!deprecated) {
3475 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003476 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003477 deprecated = 1;
3478 }
3479#if 0
3480 xmlNsPtr cur;
3481
3482 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3483 if (doc->children == NULL) {
3484#ifdef DEBUG_TREE
3485 xmlGenericError(xmlGenericErrorContext,
3486 "xmlUpgradeOldNs: failed no root !\n");
3487#endif
3488 return;
3489 }
3490
3491 cur = doc->oldNs;
3492 while (cur->next != NULL) {
3493 cur->type = XML_LOCAL_NAMESPACE;
3494 cur = cur->next;
3495 }
3496 cur->type = XML_LOCAL_NAMESPACE;
3497 cur->next = doc->children->nsDef;
3498 doc->children->nsDef = doc->oldNs;
3499 doc->oldNs = NULL;
3500#endif
3501}
3502