blob: 65551441eed6f5153ec99d54f0eec0ee83806a1e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
Owen Taylor3473f882001-02-23 17:55:21 +0000415static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
432};
433
Daniel Veillard01c13b52002-12-10 15:19:08 +0000434/**
435 * xmlIsBaseChar:
436 * @c: an unicode character (int)
437 *
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
440 *
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
443 * and
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
445 *
446 * Returns 0 if not, non-zero otherwise
447 */
Owen Taylor3473f882001-02-23 17:55:21 +0000448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
Daniel Veillard01c13b52002-12-10 15:19:08 +0000894static
Owen Taylor3473f882001-02-23 17:55:21 +0000895void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
899 }
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
903 }
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
907 }
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
911}
912
913#else
914#define CHECK_BUFFER(in)
915#endif
916
917
918/**
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
922 *
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
925 *
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
927 * end of this entity
928 */
929int
930xmlParserInputRead(xmlParserInputPtr in, int len) {
931 int ret;
932 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000933 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000934
935#ifdef DEBUG_INPUT
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
937#endif
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
943
944 CHECK_BUFFER(in);
945
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
948 if (ret > 0) {
949 in->cur -= ret;
950 in->consumed += ret;
951 }
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
954 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000955 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000956 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000957 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000958 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000959 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000960 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000962
963 CHECK_BUFFER(in);
964
965 return(ret);
966}
967
968/**
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
972 *
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
975 *
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977 * end of this entity
978 */
979int
980xmlParserInputGrow(xmlParserInputPtr in, int len) {
981 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000982 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000983
984#ifdef DEBUG_INPUT
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
986#endif
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
991
992 CHECK_BUFFER(in);
993
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000996
997 CHECK_BUFFER(in);
998
999 return(0);
1000 }
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1003 else
1004 return(0);
1005
1006 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1011 */
1012 if (in->base != in->buf->buffer->content) {
1013 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001015 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001016 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001017 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001018 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001019 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001037 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001038
1039#ifdef DEBUG_INPUT
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
1050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001052 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001053 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001054 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return;
1056 if (used > INPUT_CHUNK) {
1057 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1058 if (ret > 0) {
1059 in->cur -= ret;
1060 in->consumed += ret;
1061 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001062 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001063 }
1064
1065 CHECK_BUFFER(in);
1066
1067 if (in->buf->buffer->use > INPUT_CHUNK) {
1068 return;
1069 }
1070 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1071 if (in->base != in->buf->buffer->content) {
1072 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001073 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001074 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001075 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001076 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001077 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001078 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001079 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001080
1081 CHECK_BUFFER(in);
1082}
1083
1084/************************************************************************
1085 * *
1086 * UTF8 character input and related functions *
1087 * *
1088 ************************************************************************/
1089
1090/**
1091 * xmlNextChar:
1092 * @ctxt: the XML parser context
1093 *
1094 * Skip to the next char input char.
1095 */
1096
1097void
1098xmlNextChar(xmlParserCtxtPtr ctxt) {
1099 if (ctxt->instate == XML_PARSER_EOF)
1100 return;
1101
1102 /*
1103 * 2.11 End-of-Line Handling
1104 * the literal two-character sequence "#xD#xA" or a standalone
1105 * literal #xD, an XML processor must pass to the application
1106 * the single character #xA.
1107 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00001108 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001109 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001110 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1111 (ctxt->instate != XML_PARSER_COMMENT)) {
1112 /*
1113 * If we are at the end of the current entity and
1114 * the context allows it, we pop consumed entities
1115 * automatically.
1116 * the auto closing should be blocked in other cases
1117 */
1118 xmlPopInput(ctxt);
1119 } else {
1120 if (*(ctxt->input->cur) == '\n') {
1121 ctxt->input->line++; ctxt->input->col = 1;
1122 } else ctxt->input->col++;
1123 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1124 /*
1125 * We are supposed to handle UTF8, check it's valid
1126 * From rfc2044: encoding of the Unicode values on UTF-8:
1127 *
1128 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1129 * 0000 0000-0000 007F 0xxxxxxx
1130 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1131 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1132 *
1133 * Check for the 0x110000 limit too
1134 */
1135 const unsigned char *cur = ctxt->input->cur;
1136 unsigned char c;
1137
1138 c = *cur;
1139 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001140 if (cur[1] == 0)
1141 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1142 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001143 goto encoding_error;
1144 if ((c & 0xe0) == 0xe0) {
1145 unsigned int val;
1146
Daniel Veillard561b7f82002-03-20 21:55:57 +00001147 if (cur[2] == 0)
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001150 goto encoding_error;
1151 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001152 if (cur[3] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001155 ((cur[3] & 0xc0) != 0x80))
1156 goto encoding_error;
1157 /* 4-byte code */
1158 ctxt->input->cur += 4;
1159 val = (cur[0] & 0x7) << 18;
1160 val |= (cur[1] & 0x3f) << 12;
1161 val |= (cur[2] & 0x3f) << 6;
1162 val |= cur[3] & 0x3f;
1163 } else {
1164 /* 3-byte code */
1165 ctxt->input->cur += 3;
1166 val = (cur[0] & 0xf) << 12;
1167 val |= (cur[1] & 0x3f) << 6;
1168 val |= cur[2] & 0x3f;
1169 }
1170 if (((val > 0xd7ff) && (val < 0xe000)) ||
1171 ((val > 0xfffd) && (val < 0x10000)) ||
1172 (val >= 0x110000)) {
1173 if ((ctxt->sax != NULL) &&
1174 (ctxt->sax->error != NULL))
1175 ctxt->sax->error(ctxt->userData,
1176 "Char 0x%X out of allowed range\n", val);
1177 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1178 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001179 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001180 }
1181 } else
1182 /* 2-byte code */
1183 ctxt->input->cur += 2;
1184 } else
1185 /* 1-byte code */
1186 ctxt->input->cur++;
1187 } else {
1188 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001189 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001190 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001191 * XML constructs only use < 128 chars
1192 */
1193 ctxt->input->cur++;
1194 }
1195 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001196 if (*ctxt->input->cur == 0)
1197 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 }
1199 } else {
1200 ctxt->input->cur++;
1201 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001202 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001203 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1204 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001205 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001206 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001207 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001208 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1209 xmlPopInput(ctxt);
1210 return;
1211encoding_error:
1212 /*
1213 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001214 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001215 * declaration header. Report the error and switch the encoding
1216 * to ISO-Latin-1 (if you don't like this policy, just declare the
1217 * encoding !)
1218 */
1219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1220 ctxt->sax->error(ctxt->userData,
1221 "Input is not proper UTF-8, indicate encoding !\n");
1222 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001223 ctxt->input->cur[0], ctxt->input->cur[1],
1224 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001225 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001226 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001227 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1228
1229 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001230 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001231 return;
1232}
1233
1234/**
1235 * xmlCurrentChar:
1236 * @ctxt: the XML parser context
1237 * @len: pointer to the length of the char read
1238 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001239 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001240 * bytes in the input buffer. Implement the end of line normalization:
1241 * 2.11 End-of-Line Handling
1242 * Wherever an external parsed entity or the literal entity value
1243 * of an internal parsed entity contains either the literal two-character
1244 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1245 * must pass to the application the single character #xA.
1246 * This behavior can conveniently be produced by normalizing all
1247 * line breaks to #xA on input, before parsing.)
1248 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001249 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001250 */
1251
1252int
1253xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1254 if (ctxt->instate == XML_PARSER_EOF)
1255 return(0);
1256
Daniel Veillard561b7f82002-03-20 21:55:57 +00001257 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1258 *len = 1;
1259 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001260 }
1261 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1262 /*
1263 * We are supposed to handle UTF8, check it's valid
1264 * From rfc2044: encoding of the Unicode values on UTF-8:
1265 *
1266 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1267 * 0000 0000-0000 007F 0xxxxxxx
1268 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1269 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1270 *
1271 * Check for the 0x110000 limit too
1272 */
1273 const unsigned char *cur = ctxt->input->cur;
1274 unsigned char c;
1275 unsigned int val;
1276
1277 c = *cur;
1278 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001279 if (cur[1] == 0)
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001282 goto encoding_error;
1283 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001284
1285 if (cur[2] == 0)
1286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1287 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001288 goto encoding_error;
1289 if ((c & 0xf0) == 0xf0) {
1290 if (cur[3] == 0)
1291 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001292 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001293 ((cur[3] & 0xc0) != 0x80))
1294 goto encoding_error;
1295 /* 4-byte code */
1296 *len = 4;
1297 val = (cur[0] & 0x7) << 18;
1298 val |= (cur[1] & 0x3f) << 12;
1299 val |= (cur[2] & 0x3f) << 6;
1300 val |= cur[3] & 0x3f;
1301 } else {
1302 /* 3-byte code */
1303 *len = 3;
1304 val = (cur[0] & 0xf) << 12;
1305 val |= (cur[1] & 0x3f) << 6;
1306 val |= cur[2] & 0x3f;
1307 }
1308 } else {
1309 /* 2-byte code */
1310 *len = 2;
1311 val = (cur[0] & 0x1f) << 6;
1312 val |= cur[1] & 0x3f;
1313 }
1314 if (!IS_CHAR(val)) {
1315 if ((ctxt->sax != NULL) &&
1316 (ctxt->sax->error != NULL))
1317 ctxt->sax->error(ctxt->userData,
1318 "Char 0x%X out of allowed range\n", val);
1319 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1320 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001321 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001322 }
1323 return(val);
1324 } else {
1325 /* 1-byte code */
1326 *len = 1;
1327 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001328 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001329 ctxt->nbChars++;
1330 ctxt->input->cur++;
1331 }
1332 return(0xA);
1333 }
1334 return((int) *ctxt->input->cur);
1335 }
1336 }
1337 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001338 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001339 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001340 * XML constructs only use < 128 chars
1341 */
1342 *len = 1;
1343 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001344 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001345 ctxt->nbChars++;
1346 ctxt->input->cur++;
1347 }
1348 return(0xA);
1349 }
1350 return((int) *ctxt->input->cur);
1351encoding_error:
1352 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001353 * An encoding problem may arise from a truncated input buffer
1354 * splitting a character in the middle. In that case do not raise
1355 * an error but return 0 to endicate an end of stream problem
1356 */
1357 if (ctxt->input->end - ctxt->input->cur < 4) {
1358 *len = 0;
1359 return(0);
1360 }
1361
1362 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001363 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001364 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001365 * declaration header. Report the error and switch the encoding
1366 * to ISO-Latin-1 (if you don't like this policy, just declare the
1367 * encoding !)
1368 */
1369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1370 ctxt->sax->error(ctxt->userData,
1371 "Input is not proper UTF-8, indicate encoding !\n");
1372 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001373 ctxt->input->cur[0], ctxt->input->cur[1],
1374 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001375 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001376 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001377 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1378
1379 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1380 *len = 1;
1381 return((int) *ctxt->input->cur);
1382}
1383
1384/**
1385 * xmlStringCurrentChar:
1386 * @ctxt: the XML parser context
1387 * @cur: pointer to the beginning of the char
1388 * @len: pointer to the length of the char read
1389 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001390 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001391 * bytes in the input buffer.
1392 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001393 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001394 */
1395
1396int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001397xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1398{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001399 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001400 /*
1401 * We are supposed to handle UTF8, check it's valid
1402 * From rfc2044: encoding of the Unicode values on UTF-8:
1403 *
1404 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1405 * 0000 0000-0000 007F 0xxxxxxx
1406 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1407 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1408 *
1409 * Check for the 0x110000 limit too
1410 */
1411 unsigned char c;
1412 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001413
Daniel Veillardd8224e02002-01-13 15:43:22 +00001414 c = *cur;
1415 if (c & 0x80) {
1416 if ((cur[1] & 0xc0) != 0x80)
1417 goto encoding_error;
1418 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001419
Daniel Veillardd8224e02002-01-13 15:43:22 +00001420 if ((cur[2] & 0xc0) != 0x80)
1421 goto encoding_error;
1422 if ((c & 0xf0) == 0xf0) {
1423 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1424 goto encoding_error;
1425 /* 4-byte code */
1426 *len = 4;
1427 val = (cur[0] & 0x7) << 18;
1428 val |= (cur[1] & 0x3f) << 12;
1429 val |= (cur[2] & 0x3f) << 6;
1430 val |= cur[3] & 0x3f;
1431 } else {
1432 /* 3-byte code */
1433 *len = 3;
1434 val = (cur[0] & 0xf) << 12;
1435 val |= (cur[1] & 0x3f) << 6;
1436 val |= cur[2] & 0x3f;
1437 }
1438 } else {
1439 /* 2-byte code */
1440 *len = 2;
1441 val = (cur[0] & 0x1f) << 6;
1442 val |= cur[1] & 0x3f;
1443 }
1444 if (!IS_CHAR(val)) {
1445 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1446 (ctxt->sax->error != NULL))
1447 ctxt->sax->error(ctxt->userData,
1448 "Char 0x%X out of allowed range\n",
1449 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001450 if (ctxt != NULL) {
1451 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1452 ctxt->wellFormed = 0;
1453 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1454 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001455 }
1456 return (val);
1457 } else {
1458 /* 1-byte code */
1459 *len = 1;
1460 return ((int) *cur);
1461 }
Owen Taylor3473f882001-02-23 17:55:21 +00001462 }
1463 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001464 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001465 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001466 * XML constructs only use < 128 chars
1467 */
1468 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001469 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001470encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001471
Owen Taylor3473f882001-02-23 17:55:21 +00001472 /*
1473 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001474 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001475 * declaration header. Report the error and switch the encoding
1476 * to ISO-Latin-1 (if you don't like this policy, just declare the
1477 * encoding !)
1478 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001479 if (ctxt != NULL) {
1480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1481 ctxt->sax->error(ctxt->userData,
1482 "Input is not proper UTF-8, indicate encoding !\n");
1483 ctxt->sax->error(ctxt->userData,
1484 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1485 ctxt->input->cur[0], ctxt->input->cur[1],
1486 ctxt->input->cur[2], ctxt->input->cur[3]);
1487 }
1488 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001489 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001490 }
Owen Taylor3473f882001-02-23 17:55:21 +00001491
1492 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001493 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001494}
1495
1496/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001497 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001498 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001499 * @val: the char value
1500 *
1501 * append the char value in the array
1502 *
1503 * Returns the number of xmlChar written
1504 */
Owen Taylor3473f882001-02-23 17:55:21 +00001505int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001506xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001507 /*
1508 * We are supposed to handle UTF8, check it's valid
1509 * From rfc2044: encoding of the Unicode values on UTF-8:
1510 *
1511 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1512 * 0000 0000-0000 007F 0xxxxxxx
1513 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1514 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1515 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 if (val >= 0x80) {
1517 xmlChar *savedout = out;
1518 int bits;
1519 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1520 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1521 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1522 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001523 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001524 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001525 val);
1526 return(0);
1527 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 for ( ; bits >= 0; bits-= 6)
1529 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1530 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 }
1532 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001533 return 1;
1534}
1535
1536/**
1537 * xmlCopyChar:
1538 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001539 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001540 * @val: the char value
1541 *
1542 * append the char value in the array
1543 *
1544 * Returns the number of xmlChar written
1545 */
1546
1547int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001548xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001549 /* the len parameter is ignored */
1550 if (val >= 0x80) {
1551 return(xmlCopyCharMultiByte (out, val));
1552 }
1553 *out = (xmlChar) val;
1554 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001555}
1556
1557/************************************************************************
1558 * *
1559 * Commodity functions to switch encodings *
1560 * *
1561 ************************************************************************/
1562
1563/**
1564 * xmlSwitchEncoding:
1565 * @ctxt: the parser context
1566 * @enc: the encoding value (number)
1567 *
1568 * change the input functions when discovering the character encoding
1569 * of a given entity.
1570 *
1571 * Returns 0 in case of success, -1 otherwise
1572 */
1573int
1574xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1575{
1576 xmlCharEncodingHandlerPtr handler;
1577
1578 switch (enc) {
1579 case XML_CHAR_ENCODING_ERROR:
1580 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1582 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1583 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001584 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001585 break;
1586 case XML_CHAR_ENCODING_NONE:
1587 /* let's assume it's UTF-8 without the XML decl */
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 return(0);
1590 case XML_CHAR_ENCODING_UTF8:
1591 /* default encoding, no conversion should be needed */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001593
1594 /*
1595 * Errata on XML-1.0 June 20 2001
1596 * Specific handling of the Byte Order Mark for
1597 * UTF-8
1598 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001599 if ((ctxt->input != NULL) &&
1600 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001601 (ctxt->input->cur[1] == 0xBB) &&
1602 (ctxt->input->cur[2] == 0xBF)) {
1603 ctxt->input->cur += 3;
1604 }
Owen Taylor3473f882001-02-23 17:55:21 +00001605 return(0);
1606 default:
1607 break;
1608 }
1609 handler = xmlGetCharEncodingHandler(enc);
1610 if (handler == NULL) {
1611 /*
1612 * Default handlers.
1613 */
1614 switch (enc) {
1615 case XML_CHAR_ENCODING_ERROR:
1616 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1619 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001620 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1622 break;
1623 case XML_CHAR_ENCODING_NONE:
1624 /* let's assume it's UTF-8 without the XML decl */
1625 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1626 return(0);
1627 case XML_CHAR_ENCODING_UTF8:
1628 case XML_CHAR_ENCODING_ASCII:
1629 /* default encoding, no conversion should be needed */
1630 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1631 return(0);
1632 case XML_CHAR_ENCODING_UTF16LE:
1633 break;
1634 case XML_CHAR_ENCODING_UTF16BE:
1635 break;
1636 case XML_CHAR_ENCODING_UCS4LE:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding USC4 little endian not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_UCS4BE:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding USC4 big endian not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_EBCDIC:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding EBCDIC not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS4_2143:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS4 2143 not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_UCS4_3412:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding UCS4 3412 not supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_UCS2:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding UCS2 not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_8859_1:
1673 case XML_CHAR_ENCODING_8859_2:
1674 case XML_CHAR_ENCODING_8859_3:
1675 case XML_CHAR_ENCODING_8859_4:
1676 case XML_CHAR_ENCODING_8859_5:
1677 case XML_CHAR_ENCODING_8859_6:
1678 case XML_CHAR_ENCODING_8859_7:
1679 case XML_CHAR_ENCODING_8859_8:
1680 case XML_CHAR_ENCODING_8859_9:
1681 /*
1682 * We used to keep the internal content in the
1683 * document encoding however this turns being unmaintainable
1684 * So xmlGetCharEncodingHandler() will return non-null
1685 * values for this now.
1686 */
1687 if ((ctxt->inputNr == 1) &&
1688 (ctxt->encoding == NULL) &&
1689 (ctxt->input->encoding != NULL)) {
1690 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1691 }
1692 ctxt->charset = enc;
1693 return(0);
1694 case XML_CHAR_ENCODING_2022_JP:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding ISO-2022-JPnot supported\n");
1699 break;
1700 case XML_CHAR_ENCODING_SHIFT_JIS:
1701 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1703 ctxt->sax->error(ctxt->userData,
1704 "char encoding Shift_JIS not supported\n");
1705 break;
1706 case XML_CHAR_ENCODING_EUC_JP:
1707 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "char encoding EUC-JPnot supported\n");
1711 break;
1712 }
1713 }
1714 if (handler == NULL)
1715 return(-1);
1716 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1717 return(xmlSwitchToEncoding(ctxt, handler));
1718}
1719
1720/**
1721 * xmlSwitchToEncoding:
1722 * @ctxt: the parser context
1723 * @handler: the encoding handler
1724 *
1725 * change the input functions when discovering the character encoding
1726 * of a given entity.
1727 *
1728 * Returns 0 in case of success, -1 otherwise
1729 */
1730int
1731xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1732{
1733 int nbchars;
1734
1735 if (handler != NULL) {
1736 if (ctxt->input != NULL) {
1737 if (ctxt->input->buf != NULL) {
1738 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001739 /*
1740 * Check in case the auto encoding detetection triggered
1741 * in already.
1742 */
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (ctxt->input->buf->encoder == handler)
1744 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001745
1746 /*
1747 * "UTF-16" can be used for both LE and BE
1748 */
1749 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1750 BAD_CAST "UTF-16", 6)) &&
1751 (!xmlStrncmp(BAD_CAST handler->name,
1752 BAD_CAST "UTF-16", 6))) {
1753 return(0);
1754 }
1755
Owen Taylor3473f882001-02-23 17:55:21 +00001756 /*
1757 * Note: this is a bit dangerous, but that's what it
1758 * takes to use nearly compatible signature for different
1759 * encodings.
1760 */
1761 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1762 ctxt->input->buf->encoder = handler;
1763 return(0);
1764 }
1765 ctxt->input->buf->encoder = handler;
1766
1767 /*
1768 * Is there already some content down the pipe to convert ?
1769 */
1770 if ((ctxt->input->buf->buffer != NULL) &&
1771 (ctxt->input->buf->buffer->use > 0)) {
1772 int processed;
1773
1774 /*
1775 * Specific handling of the Byte Order Mark for
1776 * UTF-16
1777 */
1778 if ((handler->name != NULL) &&
1779 (!strcmp(handler->name, "UTF-16LE")) &&
1780 (ctxt->input->cur[0] == 0xFF) &&
1781 (ctxt->input->cur[1] == 0xFE)) {
1782 ctxt->input->cur += 2;
1783 }
1784 if ((handler->name != NULL) &&
1785 (!strcmp(handler->name, "UTF-16BE")) &&
1786 (ctxt->input->cur[0] == 0xFE) &&
1787 (ctxt->input->cur[1] == 0xFF)) {
1788 ctxt->input->cur += 2;
1789 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001790 /*
1791 * Errata on XML-1.0 June 20 2001
1792 * Specific handling of the Byte Order Mark for
1793 * UTF-8
1794 */
1795 if ((handler->name != NULL) &&
1796 (!strcmp(handler->name, "UTF-8")) &&
1797 (ctxt->input->cur[0] == 0xEF) &&
1798 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001799 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001800 ctxt->input->cur += 3;
1801 }
Owen Taylor3473f882001-02-23 17:55:21 +00001802
1803 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 * Move it as the raw buffer and create a new input buffer
1806 */
1807 processed = ctxt->input->cur - ctxt->input->base;
1808 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1809 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1810 ctxt->input->buf->buffer = xmlBufferCreate();
1811
1812 if (ctxt->html) {
1813 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001814 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001815 */
1816 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1817 ctxt->input->buf->buffer,
1818 ctxt->input->buf->raw);
1819 } else {
1820 /*
1821 * convert just enough to get
1822 * '<?xml version="1.0" encoding="xxx"?>'
1823 * parsed with the autodetected encoding
1824 * into the parser reading buffer.
1825 */
1826 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1827 ctxt->input->buf->buffer,
1828 ctxt->input->buf->raw);
1829 }
1830 if (nbchars < 0) {
1831 xmlGenericError(xmlGenericErrorContext,
1832 "xmlSwitchToEncoding: encoder error\n");
1833 return(-1);
1834 }
1835 ctxt->input->base =
1836 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001837 ctxt->input->end =
1838 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001839
1840 }
1841 return(0);
1842 } else {
1843 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1844 /*
1845 * When parsing a static memory array one must know the
1846 * size to be able to convert the buffer.
1847 */
1848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1849 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001850 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001851 return(-1);
1852 } else {
1853 int processed;
1854
1855 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001856 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001857 * Move it as the raw buffer and create a new input buffer
1858 */
1859 processed = ctxt->input->cur - ctxt->input->base;
1860
1861 ctxt->input->buf->raw = xmlBufferCreate();
1862 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1863 ctxt->input->length - processed);
1864 ctxt->input->buf->buffer = xmlBufferCreate();
1865
1866 /*
1867 * convert as much as possible of the raw input
1868 * to the parser reading buffer.
1869 */
1870 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1871 ctxt->input->buf->buffer,
1872 ctxt->input->buf->raw);
1873 if (nbchars < 0) {
1874 xmlGenericError(xmlGenericErrorContext,
1875 "xmlSwitchToEncoding: encoder error\n");
1876 return(-1);
1877 }
1878
1879 /*
1880 * Conversion succeeded, get rid of the old buffer
1881 */
1882 if ((ctxt->input->free != NULL) &&
1883 (ctxt->input->base != NULL))
1884 ctxt->input->free((xmlChar *) ctxt->input->base);
1885 ctxt->input->base =
1886 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001887 ctxt->input->end =
1888 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 }
1891 } else {
1892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001894 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001895 return(-1);
1896 }
1897 /*
1898 * The parsing is now done in UTF8 natively
1899 */
1900 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1901 } else
1902 return(-1);
1903 return(0);
1904
1905}
1906
1907/************************************************************************
1908 * *
1909 * Commodity functions to handle entities processing *
1910 * *
1911 ************************************************************************/
1912
1913/**
1914 * xmlFreeInputStream:
1915 * @input: an xmlParserInputPtr
1916 *
1917 * Free up an input stream.
1918 */
1919void
1920xmlFreeInputStream(xmlParserInputPtr input) {
1921 if (input == NULL) return;
1922
1923 if (input->filename != NULL) xmlFree((char *) input->filename);
1924 if (input->directory != NULL) xmlFree((char *) input->directory);
1925 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1926 if (input->version != NULL) xmlFree((char *) input->version);
1927 if ((input->free != NULL) && (input->base != NULL))
1928 input->free((xmlChar *) input->base);
1929 if (input->buf != NULL)
1930 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001931 xmlFree(input);
1932}
1933
1934/**
1935 * xmlNewInputStream:
1936 * @ctxt: an XML parser context
1937 *
1938 * Create a new input stream structure
1939 * Returns the new input stream or NULL
1940 */
1941xmlParserInputPtr
1942xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1943 xmlParserInputPtr input;
1944
1945 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1946 if (input == NULL) {
1947 if (ctxt != NULL) {
1948 ctxt->errNo = XML_ERR_NO_MEMORY;
1949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1950 ctxt->sax->error(ctxt->userData,
1951 "malloc: couldn't allocate a new input stream\n");
1952 ctxt->errNo = XML_ERR_NO_MEMORY;
1953 }
1954 return(NULL);
1955 }
1956 memset(input, 0, sizeof(xmlParserInput));
1957 input->line = 1;
1958 input->col = 1;
1959 input->standalone = -1;
1960 return(input);
1961}
1962
1963/**
1964 * xmlNewIOInputStream:
1965 * @ctxt: an XML parser context
1966 * @input: an I/O Input
1967 * @enc: the charset encoding if known
1968 *
1969 * Create a new input stream structure encapsulating the @input into
1970 * a stream suitable for the parser.
1971 *
1972 * Returns the new input stream or NULL
1973 */
1974xmlParserInputPtr
1975xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1976 xmlCharEncoding enc) {
1977 xmlParserInputPtr inputStream;
1978
1979 if (xmlParserDebugEntities)
1980 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1981 inputStream = xmlNewInputStream(ctxt);
1982 if (inputStream == NULL) {
1983 return(NULL);
1984 }
1985 inputStream->filename = NULL;
1986 inputStream->buf = input;
1987 inputStream->base = inputStream->buf->buffer->content;
1988 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001989 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001990 if (enc != XML_CHAR_ENCODING_NONE) {
1991 xmlSwitchEncoding(ctxt, enc);
1992 }
1993
1994 return(inputStream);
1995}
1996
1997/**
1998 * xmlNewEntityInputStream:
1999 * @ctxt: an XML parser context
2000 * @entity: an Entity pointer
2001 *
2002 * Create a new input stream based on an xmlEntityPtr
2003 *
2004 * Returns the new input stream or NULL
2005 */
2006xmlParserInputPtr
2007xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2008 xmlParserInputPtr input;
2009
2010 if (entity == NULL) {
2011 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
2014 "internal: xmlNewEntityInputStream entity = NULL\n");
2015 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2016 return(NULL);
2017 }
2018 if (xmlParserDebugEntities)
2019 xmlGenericError(xmlGenericErrorContext,
2020 "new input from entity: %s\n", entity->name);
2021 if (entity->content == NULL) {
2022 switch (entity->etype) {
2023 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2024 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2026 ctxt->sax->error(ctxt->userData,
2027 "xmlNewEntityInputStream unparsed entity !\n");
2028 break;
2029 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2030 case XML_EXTERNAL_PARAMETER_ENTITY:
2031 return(xmlLoadExternalEntity((char *) entity->URI,
2032 (char *) entity->ExternalID, ctxt));
2033 case XML_INTERNAL_GENERAL_ENTITY:
2034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2035 ctxt->sax->error(ctxt->userData,
2036 "Internal entity %s without content !\n", entity->name);
2037 break;
2038 case XML_INTERNAL_PARAMETER_ENTITY:
2039 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2041 ctxt->sax->error(ctxt->userData,
2042 "Internal parameter entity %s without content !\n", entity->name);
2043 break;
2044 case XML_INTERNAL_PREDEFINED_ENTITY:
2045 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2047 ctxt->sax->error(ctxt->userData,
2048 "Predefined entity %s without content !\n", entity->name);
2049 break;
2050 }
2051 return(NULL);
2052 }
2053 input = xmlNewInputStream(ctxt);
2054 if (input == NULL) {
2055 return(NULL);
2056 }
2057 input->filename = (char *) entity->URI;
2058 input->base = entity->content;
2059 input->cur = entity->content;
2060 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002061 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002062 return(input);
2063}
2064
2065/**
2066 * xmlNewStringInputStream:
2067 * @ctxt: an XML parser context
2068 * @buffer: an memory buffer
2069 *
2070 * Create a new input stream based on a memory buffer.
2071 * Returns the new input stream
2072 */
2073xmlParserInputPtr
2074xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2075 xmlParserInputPtr input;
2076
2077 if (buffer == NULL) {
2078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2080 ctxt->sax->error(ctxt->userData,
2081 "internal: xmlNewStringInputStream string = NULL\n");
2082 return(NULL);
2083 }
2084 if (xmlParserDebugEntities)
2085 xmlGenericError(xmlGenericErrorContext,
2086 "new fixed input: %.30s\n", buffer);
2087 input = xmlNewInputStream(ctxt);
2088 if (input == NULL) {
2089 return(NULL);
2090 }
2091 input->base = buffer;
2092 input->cur = buffer;
2093 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002094 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002095 return(input);
2096}
2097
2098/**
2099 * xmlNewInputFromFile:
2100 * @ctxt: an XML parser context
2101 * @filename: the filename to use as entity
2102 *
2103 * Create a new input stream based on a file.
2104 *
2105 * Returns the new input stream or NULL in case of error
2106 */
2107xmlParserInputPtr
2108xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2109 xmlParserInputBufferPtr buf;
2110 xmlParserInputPtr inputStream;
2111 char *directory = NULL;
2112 xmlChar *URI = NULL;
2113
2114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "new input from file: %s\n", filename);
2117 if (ctxt == NULL) return(NULL);
2118 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2119 if (buf == NULL)
2120 return(NULL);
2121
2122 URI = xmlStrdup((xmlChar *) filename);
2123 directory = xmlParserGetDirectory((const char *) URI);
2124
2125 inputStream = xmlNewInputStream(ctxt);
2126 if (inputStream == NULL) {
2127 if (directory != NULL) xmlFree((char *) directory);
2128 if (URI != NULL) xmlFree((char *) URI);
2129 return(NULL);
2130 }
2131
2132 inputStream->filename = (const char *) URI;
2133 inputStream->directory = directory;
2134 inputStream->buf = buf;
2135
2136 inputStream->base = inputStream->buf->buffer->content;
2137 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002138 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002139 if ((ctxt->directory == NULL) && (directory != NULL))
2140 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2141 return(inputStream);
2142}
2143
2144/************************************************************************
2145 * *
2146 * Commodity functions to handle parser contexts *
2147 * *
2148 ************************************************************************/
2149
2150/**
2151 * xmlInitParserCtxt:
2152 * @ctxt: an XML parser context
2153 *
2154 * Initialize a parser context
2155 */
2156
2157void
2158xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2159{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002160 if(ctxt==NULL) {
2161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: NULL context given\n");
2163 return;
2164 }
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 xmlDefaultSAXHandlerInit();
2167
William M. Brack8b2c7f12002-11-22 05:07:29 +00002168 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2169 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlGenericError(xmlGenericErrorContext,
2171 "xmlInitParserCtxt: out of memory\n");
2172 }
2173 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002174 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002175
2176 /* Allocate the Input stack */
2177 ctxt->inputTab = (xmlParserInputPtr *)
2178 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2179 if (ctxt->inputTab == NULL) {
2180 xmlGenericError(xmlGenericErrorContext,
2181 "xmlInitParserCtxt: out of memory\n");
2182 ctxt->inputNr = 0;
2183 ctxt->inputMax = 0;
2184 ctxt->input = NULL;
2185 return;
2186 }
2187 ctxt->inputNr = 0;
2188 ctxt->inputMax = 5;
2189 ctxt->input = NULL;
2190
2191 ctxt->version = NULL;
2192 ctxt->encoding = NULL;
2193 ctxt->standalone = -1;
2194 ctxt->hasExternalSubset = 0;
2195 ctxt->hasPErefs = 0;
2196 ctxt->html = 0;
2197 ctxt->external = 0;
2198 ctxt->instate = XML_PARSER_START;
2199 ctxt->token = 0;
2200 ctxt->directory = NULL;
2201
2202 /* Allocate the Node stack */
2203 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2204 if (ctxt->nodeTab == NULL) {
2205 xmlGenericError(xmlGenericErrorContext,
2206 "xmlInitParserCtxt: out of memory\n");
2207 ctxt->nodeNr = 0;
2208 ctxt->nodeMax = 0;
2209 ctxt->node = NULL;
2210 ctxt->inputNr = 0;
2211 ctxt->inputMax = 0;
2212 ctxt->input = NULL;
2213 return;
2214 }
2215 ctxt->nodeNr = 0;
2216 ctxt->nodeMax = 10;
2217 ctxt->node = NULL;
2218
2219 /* Allocate the Name stack */
2220 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2221 if (ctxt->nameTab == NULL) {
2222 xmlGenericError(xmlGenericErrorContext,
2223 "xmlInitParserCtxt: out of memory\n");
2224 ctxt->nodeNr = 0;
2225 ctxt->nodeMax = 0;
2226 ctxt->node = NULL;
2227 ctxt->inputNr = 0;
2228 ctxt->inputMax = 0;
2229 ctxt->input = NULL;
2230 ctxt->nameNr = 0;
2231 ctxt->nameMax = 0;
2232 ctxt->name = NULL;
2233 return;
2234 }
2235 ctxt->nameNr = 0;
2236 ctxt->nameMax = 10;
2237 ctxt->name = NULL;
2238
2239 /* Allocate the space stack */
2240 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2241 if (ctxt->spaceTab == NULL) {
2242 xmlGenericError(xmlGenericErrorContext,
2243 "xmlInitParserCtxt: out of memory\n");
2244 ctxt->nodeNr = 0;
2245 ctxt->nodeMax = 0;
2246 ctxt->node = NULL;
2247 ctxt->inputNr = 0;
2248 ctxt->inputMax = 0;
2249 ctxt->input = NULL;
2250 ctxt->nameNr = 0;
2251 ctxt->nameMax = 0;
2252 ctxt->name = NULL;
2253 ctxt->spaceNr = 0;
2254 ctxt->spaceMax = 0;
2255 ctxt->space = NULL;
2256 return;
2257 }
2258 ctxt->spaceNr = 1;
2259 ctxt->spaceMax = 10;
2260 ctxt->spaceTab[0] = -1;
2261 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002262 ctxt->userData = ctxt;
2263 ctxt->myDoc = NULL;
2264 ctxt->wellFormed = 1;
2265 ctxt->valid = 1;
2266 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2267 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2268 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002269 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002270 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002271 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002272 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002273
Owen Taylor3473f882001-02-23 17:55:21 +00002274 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002275 ctxt->vctxt.error = xmlParserValidityError;
2276 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002277 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002278 if (xmlGetWarningsDefaultValue == 0)
2279 ctxt->vctxt.warning = NULL;
2280 else
2281 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002282 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 }
2284 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2285 ctxt->record_info = 0;
2286 ctxt->nbChars = 0;
2287 ctxt->checkIndex = 0;
2288 ctxt->inSubset = 0;
2289 ctxt->errNo = XML_ERR_OK;
2290 ctxt->depth = 0;
2291 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002292 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002293 xmlInitNodeInfoSeq(&ctxt->node_seq);
2294}
2295
2296/**
2297 * xmlFreeParserCtxt:
2298 * @ctxt: an XML parser context
2299 *
2300 * Free all the memory used by a parser context. However the parsed
2301 * document in ctxt->myDoc is not freed.
2302 */
2303
2304void
2305xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2306{
2307 xmlParserInputPtr input;
2308 xmlChar *oldname;
2309
2310 if (ctxt == NULL) return;
2311
2312 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2313 xmlFreeInputStream(input);
2314 }
2315 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2316 xmlFree(oldname);
2317 }
2318 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2319 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2320 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2321 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2322 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2323 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2324 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2325 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2326 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002327 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2328 xmlFree(ctxt->sax);
2329 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002330 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002331#ifdef LIBXML_CATALOG_ENABLED
2332 if (ctxt->catalogs != NULL)
2333 xmlCatalogFreeLocal(ctxt->catalogs);
2334#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002335 xmlFree(ctxt);
2336}
2337
2338/**
2339 * xmlNewParserCtxt:
2340 *
2341 * Allocate and initialize a new parser context.
2342 *
2343 * Returns the xmlParserCtxtPtr or NULL
2344 */
2345
2346xmlParserCtxtPtr
2347xmlNewParserCtxt()
2348{
2349 xmlParserCtxtPtr ctxt;
2350
2351 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2352 if (ctxt == NULL) {
2353 xmlGenericError(xmlGenericErrorContext,
2354 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002355 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002356 return(NULL);
2357 }
2358 memset(ctxt, 0, sizeof(xmlParserCtxt));
2359 xmlInitParserCtxt(ctxt);
2360 return(ctxt);
2361}
2362
2363/************************************************************************
2364 * *
2365 * Handling of node informations *
2366 * *
2367 ************************************************************************/
2368
2369/**
2370 * xmlClearParserCtxt:
2371 * @ctxt: an XML parser context
2372 *
2373 * Clear (release owned resources) and reinitialize a parser context
2374 */
2375
2376void
2377xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2378{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002379 if (ctxt==NULL)
2380 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002381 xmlClearNodeInfoSeq(&ctxt->node_seq);
2382 xmlInitParserCtxt(ctxt);
2383}
2384
2385/**
2386 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002387 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002388 * @node: an XML node within the tree
2389 *
2390 * Find the parser node info struct for a given node
2391 *
2392 * Returns an xmlParserNodeInfo block pointer or NULL
2393 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002394const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2395 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002396{
2397 unsigned long pos;
2398
2399 /* Find position where node should be at */
2400 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002401 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002402 return &ctx->node_seq.buffer[pos];
2403 else
2404 return NULL;
2405}
2406
2407
2408/**
2409 * xmlInitNodeInfoSeq:
2410 * @seq: a node info sequence pointer
2411 *
2412 * -- Initialize (set to initial state) node info sequence
2413 */
2414void
2415xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2416{
2417 seq->length = 0;
2418 seq->maximum = 0;
2419 seq->buffer = NULL;
2420}
2421
2422/**
2423 * xmlClearNodeInfoSeq:
2424 * @seq: a node info sequence pointer
2425 *
2426 * -- Clear (release memory and reinitialize) node
2427 * info sequence
2428 */
2429void
2430xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2431{
2432 if ( seq->buffer != NULL )
2433 xmlFree(seq->buffer);
2434 xmlInitNodeInfoSeq(seq);
2435}
2436
2437
2438/**
2439 * xmlParserFindNodeInfoIndex:
2440 * @seq: a node info sequence pointer
2441 * @node: an XML node pointer
2442 *
2443 *
2444 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2445 * the given node is or should be at in a sorted sequence
2446 *
2447 * Returns a long indicating the position of the record
2448 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002449unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2450 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002451{
2452 unsigned long upper, lower, middle;
2453 int found = 0;
2454
2455 /* Do a binary search for the key */
2456 lower = 1;
2457 upper = seq->length;
2458 middle = 0;
2459 while ( lower <= upper && !found) {
2460 middle = lower + (upper - lower) / 2;
2461 if ( node == seq->buffer[middle - 1].node )
2462 found = 1;
2463 else if ( node < seq->buffer[middle - 1].node )
2464 upper = middle - 1;
2465 else
2466 lower = middle + 1;
2467 }
2468
2469 /* Return position */
2470 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2471 return middle;
2472 else
2473 return middle - 1;
2474}
2475
2476
2477/**
2478 * xmlParserAddNodeInfo:
2479 * @ctxt: an XML parser context
2480 * @info: a node info sequence pointer
2481 *
2482 * Insert node info record into the sorted sequence
2483 */
2484void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002485xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002486 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002487{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002488 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002489
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002490 /* Find pos and check to see if node is already in the sequence */
2491 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2492 info->node);
2493 if (pos < ctxt->node_seq.length
2494 && ctxt->node_seq.buffer[pos].node == info->node) {
2495 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002496 }
2497
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002498 /* Otherwise, we need to add new node to buffer */
2499 else {
2500 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2501 xmlParserNodeInfo *tmp_buffer;
2502 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002503
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002504 if (ctxt->node_seq.maximum == 0)
2505 ctxt->node_seq.maximum = 2;
2506 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2507 (2 * ctxt->node_seq.maximum));
2508
2509 if (ctxt->node_seq.buffer == NULL)
2510 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2511 else
2512 tmp_buffer =
2513 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2514 byte_size);
2515
2516 if (tmp_buffer == NULL) {
2517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2518 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2519 ctxt->errNo = XML_ERR_NO_MEMORY;
2520 return;
2521 }
2522 ctxt->node_seq.buffer = tmp_buffer;
2523 ctxt->node_seq.maximum *= 2;
2524 }
2525
2526 /* If position is not at end, move elements out of the way */
2527 if (pos != ctxt->node_seq.length) {
2528 unsigned long i;
2529
2530 for (i = ctxt->node_seq.length; i > pos; i--)
2531 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2532 }
2533
2534 /* Copy element and increase length */
2535 ctxt->node_seq.buffer[pos] = *info;
2536 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002537 }
Owen Taylor3473f882001-02-23 17:55:21 +00002538}
2539
2540/************************************************************************
2541 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002542 * Defaults settings *
2543 * *
2544 ************************************************************************/
2545/**
2546 * xmlPedanticParserDefault:
2547 * @val: int 0 or 1
2548 *
2549 * Set and return the previous value for enabling pedantic warnings.
2550 *
2551 * Returns the last value for 0 for no substitution, 1 for substitution.
2552 */
2553
2554int
2555xmlPedanticParserDefault(int val) {
2556 int old = xmlPedanticParserDefaultValue;
2557
2558 xmlPedanticParserDefaultValue = val;
2559 return(old);
2560}
2561
2562/**
2563 * xmlLineNumbersDefault:
2564 * @val: int 0 or 1
2565 *
2566 * Set and return the previous value for enabling line numbers in elements
2567 * contents. This may break on old application and is turned off by default.
2568 *
2569 * Returns the last value for 0 for no substitution, 1 for substitution.
2570 */
2571
2572int
2573xmlLineNumbersDefault(int val) {
2574 int old = xmlLineNumbersDefaultValue;
2575
2576 xmlLineNumbersDefaultValue = val;
2577 return(old);
2578}
2579
2580/**
2581 * xmlSubstituteEntitiesDefault:
2582 * @val: int 0 or 1
2583 *
2584 * Set and return the previous value for default entity support.
2585 * Initially the parser always keep entity references instead of substituting
2586 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002587 * default parser behavior
2588 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002589 * file basis.
2590 *
2591 * Returns the last value for 0 for no substitution, 1 for substitution.
2592 */
2593
2594int
2595xmlSubstituteEntitiesDefault(int val) {
2596 int old = xmlSubstituteEntitiesDefaultValue;
2597
2598 xmlSubstituteEntitiesDefaultValue = val;
2599 return(old);
2600}
2601
2602/**
2603 * xmlKeepBlanksDefault:
2604 * @val: int 0 or 1
2605 *
2606 * Set and return the previous value for default blanks text nodes support.
2607 * The 1.x version of the parser used an heuristic to try to detect
2608 * ignorable white spaces. As a result the SAX callback was generating
2609 * ignorableWhitespace() callbacks instead of characters() one, and when
2610 * using the DOM output text nodes containing those blanks were not generated.
2611 * The 2.x and later version will switch to the XML standard way and
2612 * ignorableWhitespace() are only generated when running the parser in
2613 * validating mode and when the current element doesn't allow CDATA or
2614 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002615 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002616 * on 1.X libs and to switch back to the old mode for compatibility when
2617 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2618 * by using xmlIsBlankNode() commodity function to detect the "empty"
2619 * nodes generated.
2620 * This value also affect autogeneration of indentation when saving code
2621 * if blanks sections are kept, indentation is not generated.
2622 *
2623 * Returns the last value for 0 for no substitution, 1 for substitution.
2624 */
2625
2626int
2627xmlKeepBlanksDefault(int val) {
2628 int old = xmlKeepBlanksDefaultValue;
2629
2630 xmlKeepBlanksDefaultValue = val;
2631 xmlIndentTreeOutput = !val;
2632 return(old);
2633}
2634
2635/************************************************************************
2636 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002637 * Deprecated functions kept for compatibility *
2638 * *
2639 ************************************************************************/
2640
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002641/**
2642 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002643 * @lang: pointer to the string value
2644 *
2645 * Checks that the value conforms to the LanguageID production:
2646 *
2647 * NOTE: this is somewhat deprecated, those productions were removed from
2648 * the XML Second edition.
2649 *
2650 * [33] LanguageID ::= Langcode ('-' Subcode)*
2651 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2652 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2653 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2654 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2655 * [38] Subcode ::= ([a-z] | [A-Z])+
2656 *
2657 * Returns 1 if correct 0 otherwise
2658 **/
2659int
2660xmlCheckLanguageID(const xmlChar *lang) {
2661 const xmlChar *cur = lang;
2662
2663 if (cur == NULL)
2664 return(0);
2665 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2666 ((cur[0] == 'I') && (cur[1] == '-'))) {
2667 /*
2668 * IANA code
2669 */
2670 cur += 2;
2671 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2672 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2673 cur++;
2674 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2675 ((cur[0] == 'X') && (cur[1] == '-'))) {
2676 /*
2677 * User code
2678 */
2679 cur += 2;
2680 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2681 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2682 cur++;
2683 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2684 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2685 /*
2686 * ISO639
2687 */
2688 cur++;
2689 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2690 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2691 cur++;
2692 else
2693 return(0);
2694 } else
2695 return(0);
2696 while (cur[0] != 0) { /* non input consuming */
2697 if (cur[0] != '-')
2698 return(0);
2699 cur++;
2700 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2701 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2702 cur++;
2703 else
2704 return(0);
2705 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2706 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2707 cur++;
2708 }
2709 return(1);
2710}
2711
2712/**
2713 * xmlDecodeEntities:
2714 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002715 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002716 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002717 * @end: an end marker xmlChar, 0 if none
2718 * @end2: an end marker xmlChar, 0 if none
2719 * @end3: an end marker xmlChar, 0 if none
2720 *
2721 * This function is deprecated, we now always process entities content
2722 * through xmlStringDecodeEntities
2723 *
2724 * TODO: remove it in next major release.
2725 *
2726 * [67] Reference ::= EntityRef | CharRef
2727 *
2728 * [69] PEReference ::= '%' Name ';'
2729 *
2730 * Returns A newly allocated string with the substitution done. The caller
2731 * must deallocate it !
2732 */
2733xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002734xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2735 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002736#if 0
2737 xmlChar *buffer = NULL;
2738 unsigned int buffer_size = 0;
2739 unsigned int nbchars = 0;
2740
2741 xmlChar *current = NULL;
2742 xmlEntityPtr ent;
2743 unsigned int max = (unsigned int) len;
2744 int c,l;
2745#endif
2746
2747 static int deprecated = 0;
2748 if (!deprecated) {
2749 xmlGenericError(xmlGenericErrorContext,
2750 "xmlDecodeEntities() deprecated function reached\n");
2751 deprecated = 1;
2752 }
2753
2754#if 0
2755 if (ctxt->depth > 40) {
2756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2757 ctxt->sax->error(ctxt->userData,
2758 "Detected entity reference loop\n");
2759 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002760 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2762 return(NULL);
2763 }
2764
2765 /*
2766 * allocate a translation buffer.
2767 */
2768 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2769 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2770 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002771 xmlGenericError(xmlGenericErrorContext,
2772 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002773 return(NULL);
2774 }
2775
2776 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002777 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002778 */
2779 GROW;
2780 c = CUR_CHAR(l);
2781 while ((nbchars < max) && (c != end) && /* NOTUSED */
2782 (c != end2) && (c != end3)) {
2783 GROW;
2784 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002785 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002786 int val = xmlParseCharRef(ctxt);
2787 COPY_BUF(0,buffer,nbchars,val);
2788 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002789 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002790 (what & XML_SUBSTITUTE_REF)) {
2791 if (xmlParserDebugEntities)
2792 xmlGenericError(xmlGenericErrorContext,
2793 "decoding Entity Reference\n");
2794 ent = xmlParseEntityRef(ctxt);
2795 if ((ent != NULL) &&
2796 (ctxt->replaceEntities != 0)) {
2797 current = ent->content;
2798 while (*current != 0) { /* non input consuming loop */
2799 buffer[nbchars++] = *current++;
2800 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2801 growBuffer(buffer);
2802 }
2803 }
2804 } else if (ent != NULL) {
2805 const xmlChar *cur = ent->name;
2806
2807 buffer[nbchars++] = '&';
2808 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2809 growBuffer(buffer);
2810 }
2811 while (*cur != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *cur++;
2813 }
2814 buffer[nbchars++] = ';';
2815 }
2816 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2817 /*
2818 * a PEReference induce to switch the entity flow,
2819 * we break here to flush the current set of chars
2820 * parsed if any. We will be called back later.
2821 */
2822 if (xmlParserDebugEntities)
2823 xmlGenericError(xmlGenericErrorContext,
2824 "decoding PE Reference\n");
2825 if (nbchars != 0) break;
2826
2827 xmlParsePEReference(ctxt);
2828
2829 /*
2830 * Pop-up of finished entities.
2831 */
2832 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2833 xmlPopInput(ctxt);
2834
2835 break;
2836 } else {
2837 COPY_BUF(l,buffer,nbchars,c);
2838 NEXTL(l);
2839 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2840 growBuffer(buffer);
2841 }
2842 }
2843 c = CUR_CHAR(l);
2844 }
2845 buffer[nbchars++] = 0;
2846 return(buffer);
2847#endif
2848 return(NULL);
2849}
2850
2851/**
2852 * xmlNamespaceParseNCName:
2853 * @ctxt: an XML parser context
2854 *
2855 * parse an XML namespace name.
2856 *
2857 * TODO: this seems not in use anymore, the namespace handling is done on
2858 * top of the SAX interfaces, i.e. not on raw input.
2859 *
2860 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2861 *
2862 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2863 * CombiningChar | Extender
2864 *
2865 * Returns the namespace name or NULL
2866 */
2867
2868xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002869xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002870#if 0
2871 xmlChar buf[XML_MAX_NAMELEN + 5];
2872 int len = 0, l;
2873 int cur = CUR_CHAR(l);
2874#endif
2875
2876 static int deprecated = 0;
2877 if (!deprecated) {
2878 xmlGenericError(xmlGenericErrorContext,
2879 "xmlNamespaceParseNCName() deprecated function reached\n");
2880 deprecated = 1;
2881 }
2882
2883#if 0
2884 /* load first the value of the char !!! */
2885 GROW;
2886 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2887
2888xmlGenericError(xmlGenericErrorContext,
2889 "xmlNamespaceParseNCName: reached loop 3\n");
2890 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2891 (cur == '.') || (cur == '-') ||
2892 (cur == '_') ||
2893 (IS_COMBINING(cur)) ||
2894 (IS_EXTENDER(cur))) {
2895 COPY_BUF(l,buf,len,cur);
2896 NEXTL(l);
2897 cur = CUR_CHAR(l);
2898 if (len >= XML_MAX_NAMELEN) {
2899 xmlGenericError(xmlGenericErrorContext,
2900 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2901 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2902 (cur == '.') || (cur == '-') ||
2903 (cur == '_') ||
2904 (IS_COMBINING(cur)) ||
2905 (IS_EXTENDER(cur))) {
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 }
2909 break;
2910 }
2911 }
2912 return(xmlStrndup(buf, len));
2913#endif
2914 return(NULL);
2915}
2916
2917/**
2918 * xmlNamespaceParseQName:
2919 * @ctxt: an XML parser context
2920 * @prefix: a xmlChar **
2921 *
2922 * TODO: this seems not in use anymore, the namespace handling is done on
2923 * top of the SAX interfaces, i.e. not on raw input.
2924 *
2925 * parse an XML qualified name
2926 *
2927 * [NS 5] QName ::= (Prefix ':')? LocalPart
2928 *
2929 * [NS 6] Prefix ::= NCName
2930 *
2931 * [NS 7] LocalPart ::= NCName
2932 *
2933 * Returns the local part, and prefix is updated
2934 * to get the Prefix if any.
2935 */
2936
2937xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002938xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002939
2940 static int deprecated = 0;
2941 if (!deprecated) {
2942 xmlGenericError(xmlGenericErrorContext,
2943 "xmlNamespaceParseQName() deprecated function reached\n");
2944 deprecated = 1;
2945 }
2946
2947#if 0
2948 xmlChar *ret = NULL;
2949
2950 *prefix = NULL;
2951 ret = xmlNamespaceParseNCName(ctxt);
2952 if (RAW == ':') {
2953 *prefix = ret;
2954 NEXT;
2955 ret = xmlNamespaceParseNCName(ctxt);
2956 }
2957
2958 return(ret);
2959#endif
2960 return(NULL);
2961}
2962
2963/**
2964 * xmlNamespaceParseNSDef:
2965 * @ctxt: an XML parser context
2966 *
2967 * parse a namespace prefix declaration
2968 *
2969 * TODO: this seems not in use anymore, the namespace handling is done on
2970 * top of the SAX interfaces, i.e. not on raw input.
2971 *
2972 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2973 *
2974 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2975 *
2976 * Returns the namespace name
2977 */
2978
2979xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002980xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002981 static int deprecated = 0;
2982 if (!deprecated) {
2983 xmlGenericError(xmlGenericErrorContext,
2984 "xmlNamespaceParseNSDef() deprecated function reached\n");
2985 deprecated = 1;
2986 }
2987 return(NULL);
2988#if 0
2989 xmlChar *name = NULL;
2990
2991 if ((RAW == 'x') && (NXT(1) == 'm') &&
2992 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2993 (NXT(4) == 's')) {
2994 SKIP(5);
2995 if (RAW == ':') {
2996 NEXT;
2997 name = xmlNamespaceParseNCName(ctxt);
2998 }
2999 }
3000 return(name);
3001#endif
3002}
3003
3004/**
3005 * xmlParseQuotedString:
3006 * @ctxt: an XML parser context
3007 *
3008 * Parse and return a string between quotes or doublequotes
3009 *
3010 * TODO: Deprecated, to be removed at next drop of binary compatibility
3011 *
3012 * Returns the string parser or NULL.
3013 */
3014xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003015xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003016 static int deprecated = 0;
3017 if (!deprecated) {
3018 xmlGenericError(xmlGenericErrorContext,
3019 "xmlParseQuotedString() deprecated function reached\n");
3020 deprecated = 1;
3021 }
3022 return(NULL);
3023
3024#if 0
3025 xmlChar *buf = NULL;
3026 int len = 0,l;
3027 int size = XML_PARSER_BUFFER_SIZE;
3028 int c;
3029
3030 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3031 if (buf == NULL) {
3032 xmlGenericError(xmlGenericErrorContext,
3033 "malloc of %d byte failed\n", size);
3034 return(NULL);
3035 }
3036xmlGenericError(xmlGenericErrorContext,
3037 "xmlParseQuotedString: reached loop 4\n");
3038 if (RAW == '"') {
3039 NEXT;
3040 c = CUR_CHAR(l);
3041 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3042 if (len + 5 >= size) {
3043 size *= 2;
3044 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3045 if (buf == NULL) {
3046 xmlGenericError(xmlGenericErrorContext,
3047 "realloc of %d byte failed\n", size);
3048 return(NULL);
3049 }
3050 }
3051 COPY_BUF(l,buf,len,c);
3052 NEXTL(l);
3053 c = CUR_CHAR(l);
3054 }
3055 if (c != '"') {
3056 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3058 ctxt->sax->error(ctxt->userData,
3059 "String not closed \"%.50s\"\n", buf);
3060 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003061 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003062 } else {
3063 NEXT;
3064 }
3065 } else if (RAW == '\''){
3066 NEXT;
3067 c = CUR;
3068 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3069 if (len + 1 >= size) {
3070 size *= 2;
3071 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3072 if (buf == NULL) {
3073 xmlGenericError(xmlGenericErrorContext,
3074 "realloc of %d byte failed\n", size);
3075 return(NULL);
3076 }
3077 }
3078 buf[len++] = c;
3079 NEXT;
3080 c = CUR;
3081 }
3082 if (RAW != '\'') {
3083 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3085 ctxt->sax->error(ctxt->userData,
3086 "String not closed \"%.50s\"\n", buf);
3087 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003088 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003089 } else {
3090 NEXT;
3091 }
3092 }
3093 return(buf);
3094#endif
3095}
3096
3097/**
3098 * xmlParseNamespace:
3099 * @ctxt: an XML parser context
3100 *
3101 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3102 *
3103 * This is what the older xml-name Working Draft specified, a bunch of
3104 * other stuff may still rely on it, so support is still here as
3105 * if it was declared on the root of the Tree:-(
3106 *
3107 * TODO: remove from library
3108 *
3109 * To be removed at next drop of binary compatibility
3110 */
3111
3112void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003113xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003114 static int deprecated = 0;
3115 if (!deprecated) {
3116 xmlGenericError(xmlGenericErrorContext,
3117 "xmlParseNamespace() deprecated function reached\n");
3118 deprecated = 1;
3119 }
3120
3121#if 0
3122 xmlChar *href = NULL;
3123 xmlChar *prefix = NULL;
3124 int garbage = 0;
3125
3126 /*
3127 * We just skipped "namespace" or "xml:namespace"
3128 */
3129 SKIP_BLANKS;
3130
3131xmlGenericError(xmlGenericErrorContext,
3132 "xmlParseNamespace: reached loop 5\n");
3133 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3134 /*
3135 * We can have "ns" or "prefix" attributes
3136 * Old encoding as 'href' or 'AS' attributes is still supported
3137 */
3138 if ((RAW == 'n') && (NXT(1) == 's')) {
3139 garbage = 0;
3140 SKIP(2);
3141 SKIP_BLANKS;
3142
3143 if (RAW != '=') continue;
3144 NEXT;
3145 SKIP_BLANKS;
3146
3147 href = xmlParseQuotedString(ctxt);
3148 SKIP_BLANKS;
3149 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3150 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3151 garbage = 0;
3152 SKIP(4);
3153 SKIP_BLANKS;
3154
3155 if (RAW != '=') continue;
3156 NEXT;
3157 SKIP_BLANKS;
3158
3159 href = xmlParseQuotedString(ctxt);
3160 SKIP_BLANKS;
3161 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3162 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3163 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3164 garbage = 0;
3165 SKIP(6);
3166 SKIP_BLANKS;
3167
3168 if (RAW != '=') continue;
3169 NEXT;
3170 SKIP_BLANKS;
3171
3172 prefix = xmlParseQuotedString(ctxt);
3173 SKIP_BLANKS;
3174 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3175 garbage = 0;
3176 SKIP(2);
3177 SKIP_BLANKS;
3178
3179 if (RAW != '=') continue;
3180 NEXT;
3181 SKIP_BLANKS;
3182
3183 prefix = xmlParseQuotedString(ctxt);
3184 SKIP_BLANKS;
3185 } else if ((RAW == '?') && (NXT(1) == '>')) {
3186 garbage = 0;
3187 NEXT;
3188 } else {
3189 /*
3190 * Found garbage when parsing the namespace
3191 */
3192 if (!garbage) {
3193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3194 ctxt->sax->error(ctxt->userData,
3195 "xmlParseNamespace found garbage\n");
3196 }
3197 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003200 NEXT;
3201 }
3202 }
3203
3204 MOVETO_ENDTAG(CUR_PTR);
3205 NEXT;
3206
3207 /*
3208 * Register the DTD.
3209 if (href != NULL)
3210 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3211 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3212 */
3213
3214 if (prefix != NULL) xmlFree(prefix);
3215 if (href != NULL) xmlFree(href);
3216#endif
3217}
3218
3219/**
3220 * xmlScanName:
3221 * @ctxt: an XML parser context
3222 *
3223 * Trickery: parse an XML name but without consuming the input flow
3224 * Needed for rollback cases. Used only when parsing entities references.
3225 *
3226 * TODO: seems deprecated now, only used in the default part of
3227 * xmlParserHandleReference
3228 *
3229 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3230 * CombiningChar | Extender
3231 *
3232 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3233 *
3234 * [6] Names ::= Name (S Name)*
3235 *
3236 * Returns the Name parsed or NULL
3237 */
3238
3239xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003240xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003241 static int deprecated = 0;
3242 if (!deprecated) {
3243 xmlGenericError(xmlGenericErrorContext,
3244 "xmlScanName() deprecated function reached\n");
3245 deprecated = 1;
3246 }
3247 return(NULL);
3248
3249#if 0
3250 xmlChar buf[XML_MAX_NAMELEN];
3251 int len = 0;
3252
3253 GROW;
3254 if (!IS_LETTER(RAW) && (RAW != '_') &&
3255 (RAW != ':')) {
3256 return(NULL);
3257 }
3258
3259
3260 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3261 (NXT(len) == '.') || (NXT(len) == '-') ||
3262 (NXT(len) == '_') || (NXT(len) == ':') ||
3263 (IS_COMBINING(NXT(len))) ||
3264 (IS_EXTENDER(NXT(len)))) {
3265 GROW;
3266 buf[len] = NXT(len);
3267 len++;
3268 if (len >= XML_MAX_NAMELEN) {
3269 xmlGenericError(xmlGenericErrorContext,
3270 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3271 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3272 (IS_DIGIT(NXT(len))) ||
3273 (NXT(len) == '.') || (NXT(len) == '-') ||
3274 (NXT(len) == '_') || (NXT(len) == ':') ||
3275 (IS_COMBINING(NXT(len))) ||
3276 (IS_EXTENDER(NXT(len))))
3277 len++;
3278 break;
3279 }
3280 }
3281 return(xmlStrndup(buf, len));
3282#endif
3283}
3284
3285/**
3286 * xmlParserHandleReference:
3287 * @ctxt: the parser context
3288 *
3289 * TODO: Remove, now deprecated ... the test is done directly in the
3290 * content parsing
3291 * routines.
3292 *
3293 * [67] Reference ::= EntityRef | CharRef
3294 *
3295 * [68] EntityRef ::= '&' Name ';'
3296 *
3297 * [ WFC: Entity Declared ]
3298 * the Name given in the entity reference must match that in an entity
3299 * declaration, except that well-formed documents need not declare any
3300 * of the following entities: amp, lt, gt, apos, quot.
3301 *
3302 * [ WFC: Parsed Entity ]
3303 * An entity reference must not contain the name of an unparsed entity
3304 *
3305 * [66] CharRef ::= '&#' [0-9]+ ';' |
3306 * '&#x' [0-9a-fA-F]+ ';'
3307 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003308 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003309 * the handling is done accordingly to
3310 * http://www.w3.org/TR/REC-xml#entproc
3311 */
3312void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003313xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003314 static int deprecated = 0;
3315 if (!deprecated) {
3316 xmlGenericError(xmlGenericErrorContext,
3317 "xmlParserHandleReference() deprecated function reached\n");
3318 deprecated = 1;
3319 }
3320
Owen Taylor3473f882001-02-23 17:55:21 +00003321 return;
3322}
3323
3324/**
3325 * xmlHandleEntity:
3326 * @ctxt: an XML parser context
3327 * @entity: an XML entity pointer.
3328 *
3329 * Default handling of defined entities, when should we define a new input
3330 * stream ? When do we just handle that as a set of chars ?
3331 *
3332 * OBSOLETE: to be removed at some point.
3333 */
3334
3335void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003336xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003337 static int deprecated = 0;
3338 if (!deprecated) {
3339 xmlGenericError(xmlGenericErrorContext,
3340 "xmlHandleEntity() deprecated function reached\n");
3341 deprecated = 1;
3342 }
3343
3344#if 0
3345 int len;
3346 xmlParserInputPtr input;
3347
3348 if (entity->content == NULL) {
3349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3352 entity->name);
3353 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003354 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 return;
3356 }
3357 len = xmlStrlen(entity->content);
3358 if (len <= 2) goto handle_as_char;
3359
3360 /*
3361 * Redefine its content as an input stream.
3362 */
3363 input = xmlNewEntityInputStream(ctxt, entity);
3364 xmlPushInput(ctxt, input);
3365 return;
3366
3367handle_as_char:
3368 /*
3369 * Just handle the content as a set of chars.
3370 */
3371 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3372 (ctxt->sax->characters != NULL))
3373 ctxt->sax->characters(ctxt->userData, entity->content, len);
3374#endif
3375}
3376
3377/**
3378 * xmlNewGlobalNs:
3379 * @doc: the document carrying the namespace
3380 * @href: the URI associated
3381 * @prefix: the prefix for the namespace
3382 *
3383 * Creation of a Namespace, the old way using PI and without scoping
3384 * DEPRECATED !!!
3385 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003386 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003387 */
3388xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003389xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3390 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003391 static int deprecated = 0;
3392 if (!deprecated) {
3393 xmlGenericError(xmlGenericErrorContext,
3394 "xmlNewGlobalNs() deprecated function reached\n");
3395 deprecated = 1;
3396 }
3397 return(NULL);
3398#if 0
3399 xmlNodePtr root;
3400
3401 xmlNsPtr cur;
3402
3403 root = xmlDocGetRootElement(doc);
3404 if (root != NULL)
3405 return(xmlNewNs(root, href, prefix));
3406
3407 /*
3408 * if there is no root element yet, create an old Namespace type
3409 * and it will be moved to the root at save time.
3410 */
3411 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3412 if (cur == NULL) {
3413 xmlGenericError(xmlGenericErrorContext,
3414 "xmlNewGlobalNs : malloc failed\n");
3415 return(NULL);
3416 }
3417 memset(cur, 0, sizeof(xmlNs));
3418 cur->type = XML_GLOBAL_NAMESPACE;
3419
3420 if (href != NULL)
3421 cur->href = xmlStrdup(href);
3422 if (prefix != NULL)
3423 cur->prefix = xmlStrdup(prefix);
3424
3425 /*
3426 * Add it at the end to preserve parsing order ...
3427 */
3428 if (doc != NULL) {
3429 if (doc->oldNs == NULL) {
3430 doc->oldNs = cur;
3431 } else {
3432 xmlNsPtr prev = doc->oldNs;
3433
3434 while (prev->next != NULL) prev = prev->next;
3435 prev->next = cur;
3436 }
3437 }
3438
3439 return(NULL);
3440#endif
3441}
3442
3443/**
3444 * xmlUpgradeOldNs:
3445 * @doc: a document pointer
3446 *
3447 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3448 * DEPRECATED
3449 */
3450void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003451xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003452 static int deprecated = 0;
3453 if (!deprecated) {
3454 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003455 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003456 deprecated = 1;
3457 }
3458#if 0
3459 xmlNsPtr cur;
3460
3461 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3462 if (doc->children == NULL) {
3463#ifdef DEBUG_TREE
3464 xmlGenericError(xmlGenericErrorContext,
3465 "xmlUpgradeOldNs: failed no root !\n");
3466#endif
3467 return;
3468 }
3469
3470 cur = doc->oldNs;
3471 while (cur->next != NULL) {
3472 cur->type = XML_LOCAL_NAMESPACE;
3473 cur = cur->next;
3474 }
3475 cur->type = XML_LOCAL_NAMESPACE;
3476 cur->next = doc->children->nsDef;
3477 doc->children->nsDef = doc->oldNs;
3478 doc->oldNs = NULL;
3479#endif
3480}
3481