blob: 3900a0e769780c2306da2ba100830db4b1dcd74b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
Owen Taylor3473f882001-02-23 17:55:21 +0000415static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
432};
433
Daniel Veillard01c13b52002-12-10 15:19:08 +0000434/**
435 * xmlIsBaseChar:
436 * @c: an unicode character (int)
437 *
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
440 *
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
443 * and
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
445 *
446 * Returns 0 if not, non-zero otherwise
447 */
Owen Taylor3473f882001-02-23 17:55:21 +0000448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
Daniel Veillard01c13b52002-12-10 15:19:08 +0000894static
Owen Taylor3473f882001-02-23 17:55:21 +0000895void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
899 }
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
903 }
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
907 }
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
911}
912
913#else
914#define CHECK_BUFFER(in)
915#endif
916
917
918/**
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
922 *
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
925 *
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
927 * end of this entity
928 */
929int
930xmlParserInputRead(xmlParserInputPtr in, int len) {
931 int ret;
932 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000933 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000934
935#ifdef DEBUG_INPUT
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
937#endif
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
943
944 CHECK_BUFFER(in);
945
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
948 if (ret > 0) {
949 in->cur -= ret;
950 in->consumed += ret;
951 }
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
954 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000955 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000956 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000957 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000958 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000959 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000960 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000962
963 CHECK_BUFFER(in);
964
965 return(ret);
966}
967
968/**
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
972 *
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
975 *
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977 * end of this entity
978 */
979int
980xmlParserInputGrow(xmlParserInputPtr in, int len) {
981 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000982 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000983
984#ifdef DEBUG_INPUT
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
986#endif
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
991
992 CHECK_BUFFER(in);
993
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000996
997 CHECK_BUFFER(in);
998
999 return(0);
1000 }
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1003 else
1004 return(0);
1005
1006 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1011 */
1012 if (in->base != in->buf->buffer->content) {
1013 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001015 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001016 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001017 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001018 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001019 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001037 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001038
1039#ifdef DEBUG_INPUT
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
1050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001052 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001053 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001054 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return;
1056 if (used > INPUT_CHUNK) {
1057 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1058 if (ret > 0) {
1059 in->cur -= ret;
1060 in->consumed += ret;
1061 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001062 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001063 }
1064
1065 CHECK_BUFFER(in);
1066
1067 if (in->buf->buffer->use > INPUT_CHUNK) {
1068 return;
1069 }
1070 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1071 if (in->base != in->buf->buffer->content) {
1072 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001073 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001074 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001075 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001076 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001077 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001078 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001079 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001080
1081 CHECK_BUFFER(in);
1082}
1083
1084/************************************************************************
1085 * *
1086 * UTF8 character input and related functions *
1087 * *
1088 ************************************************************************/
1089
1090/**
1091 * xmlNextChar:
1092 * @ctxt: the XML parser context
1093 *
1094 * Skip to the next char input char.
1095 */
1096
1097void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001098xmlNextChar(xmlParserCtxtPtr ctxt)
1099{
Owen Taylor3473f882001-02-23 17:55:21 +00001100 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001101 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001102
Daniel Veillardfdc91562002-07-01 21:52:03 +00001103 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1106 (ctxt->instate != XML_PARSER_COMMENT)) {
1107 /*
1108 * If we are at the end of the current entity and
1109 * the context allows it, we pop consumed entities
1110 * automatically.
1111 * the auto closing should be blocked in other cases
1112 */
1113 xmlPopInput(ctxt);
1114 } else {
1115 const unsigned char *cur;
1116 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001117
Daniel Veillard77a90a72003-03-22 00:04:05 +00001118 /*
1119 * 2.11 End-of-Line Handling
1120 * the literal two-character sequence "#xD#xA" or a standalone
1121 * literal #xD, an XML processor must pass to the application
1122 * the single character #xA.
1123 */
1124 if (*(ctxt->input->cur) == '\n') {
1125 ctxt->input->line++;
1126 ctxt->input->col = 1;
1127 } else
1128 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001129
Daniel Veillard77a90a72003-03-22 00:04:05 +00001130 /*
1131 * We are supposed to handle UTF8, check it's valid
1132 * From rfc2044: encoding of the Unicode values on UTF-8:
1133 *
1134 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1135 * 0000 0000-0000 007F 0xxxxxxx
1136 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1137 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1138 *
1139 * Check for the 0x110000 limit too
1140 */
1141 cur = ctxt->input->cur;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n",
1182 val);
1183 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1184 ctxt->wellFormed = 0;
1185 if (ctxt->recovery == 0)
1186 ctxt->disableSAX = 1;
1187 }
1188 } else
1189 /* 2-byte code */
1190 ctxt->input->cur += 2;
1191 } else
1192 /* 1-byte code */
1193 ctxt->input->cur++;
1194
1195 ctxt->nbChars++;
1196 if (*ctxt->input->cur == 0)
1197 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1198 }
Owen Taylor3473f882001-02-23 17:55:21 +00001199 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001200 /*
1201 * Assume it's a fixed length encoding (1) with
1202 * a compatible encoding for the ASCII set, since
1203 * XML constructs only use < 128 chars
1204 */
1205
1206 if (*(ctxt->input->cur) == '\n') {
1207 ctxt->input->line++;
1208 ctxt->input->col = 1;
1209 } else
1210 ctxt->input->col++;
1211 ctxt->input->cur++;
1212 ctxt->nbChars++;
1213 if (*ctxt->input->cur == 0)
1214 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001215 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001216 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001217 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001218 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001219 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001220 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001221 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001222 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001223 /*
1224 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001225 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001226 * declaration header. Report the error and switch the encoding
1227 * to ISO-Latin-1 (if you don't like this policy, just declare the
1228 * encoding !)
1229 */
1230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001231 ctxt->sax->error(ctxt->userData,
1232 "Input is not proper UTF-8, indicate encoding !\n");
1233 ctxt->sax->error(ctxt->userData,
1234 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1235 ctxt->input->cur[0], ctxt->input->cur[1],
1236 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001237 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001238 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001239 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1240
Daniel Veillard77a90a72003-03-22 00:04:05 +00001241 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001242 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001243 return;
1244}
1245
1246/**
1247 * xmlCurrentChar:
1248 * @ctxt: the XML parser context
1249 * @len: pointer to the length of the char read
1250 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001251 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001252 * bytes in the input buffer. Implement the end of line normalization:
1253 * 2.11 End-of-Line Handling
1254 * Wherever an external parsed entity or the literal entity value
1255 * of an internal parsed entity contains either the literal two-character
1256 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1257 * must pass to the application the single character #xA.
1258 * This behavior can conveniently be produced by normalizing all
1259 * line breaks to #xA on input, before parsing.)
1260 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001261 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001262 */
1263
1264int
1265xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1266 if (ctxt->instate == XML_PARSER_EOF)
1267 return(0);
1268
Daniel Veillard561b7f82002-03-20 21:55:57 +00001269 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1270 *len = 1;
1271 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001272 }
1273 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1274 /*
1275 * We are supposed to handle UTF8, check it's valid
1276 * From rfc2044: encoding of the Unicode values on UTF-8:
1277 *
1278 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1279 * 0000 0000-0000 007F 0xxxxxxx
1280 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1281 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1282 *
1283 * Check for the 0x110000 limit too
1284 */
1285 const unsigned char *cur = ctxt->input->cur;
1286 unsigned char c;
1287 unsigned int val;
1288
1289 c = *cur;
1290 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001291 if (cur[1] == 0)
1292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1293 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001294 goto encoding_error;
1295 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001296
1297 if (cur[2] == 0)
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001300 goto encoding_error;
1301 if ((c & 0xf0) == 0xf0) {
1302 if (cur[3] == 0)
1303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001304 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001305 ((cur[3] & 0xc0) != 0x80))
1306 goto encoding_error;
1307 /* 4-byte code */
1308 *len = 4;
1309 val = (cur[0] & 0x7) << 18;
1310 val |= (cur[1] & 0x3f) << 12;
1311 val |= (cur[2] & 0x3f) << 6;
1312 val |= cur[3] & 0x3f;
1313 } else {
1314 /* 3-byte code */
1315 *len = 3;
1316 val = (cur[0] & 0xf) << 12;
1317 val |= (cur[1] & 0x3f) << 6;
1318 val |= cur[2] & 0x3f;
1319 }
1320 } else {
1321 /* 2-byte code */
1322 *len = 2;
1323 val = (cur[0] & 0x1f) << 6;
1324 val |= cur[1] & 0x3f;
1325 }
1326 if (!IS_CHAR(val)) {
1327 if ((ctxt->sax != NULL) &&
1328 (ctxt->sax->error != NULL))
1329 ctxt->sax->error(ctxt->userData,
1330 "Char 0x%X out of allowed range\n", val);
1331 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001334 }
1335 return(val);
1336 } else {
1337 /* 1-byte code */
1338 *len = 1;
1339 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001340 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001341 ctxt->nbChars++;
1342 ctxt->input->cur++;
1343 }
1344 return(0xA);
1345 }
1346 return((int) *ctxt->input->cur);
1347 }
1348 }
1349 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001350 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001351 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001352 * XML constructs only use < 128 chars
1353 */
1354 *len = 1;
1355 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001356 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001357 ctxt->nbChars++;
1358 ctxt->input->cur++;
1359 }
1360 return(0xA);
1361 }
1362 return((int) *ctxt->input->cur);
1363encoding_error:
1364 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001365 * An encoding problem may arise from a truncated input buffer
1366 * splitting a character in the middle. In that case do not raise
1367 * an error but return 0 to endicate an end of stream problem
1368 */
1369 if (ctxt->input->end - ctxt->input->cur < 4) {
1370 *len = 0;
1371 return(0);
1372 }
1373
1374 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001375 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001376 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001377 * declaration header. Report the error and switch the encoding
1378 * to ISO-Latin-1 (if you don't like this policy, just declare the
1379 * encoding !)
1380 */
1381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1382 ctxt->sax->error(ctxt->userData,
1383 "Input is not proper UTF-8, indicate encoding !\n");
1384 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001385 ctxt->input->cur[0], ctxt->input->cur[1],
1386 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001387 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001388 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001389 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1390
1391 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1392 *len = 1;
1393 return((int) *ctxt->input->cur);
1394}
1395
1396/**
1397 * xmlStringCurrentChar:
1398 * @ctxt: the XML parser context
1399 * @cur: pointer to the beginning of the char
1400 * @len: pointer to the length of the char read
1401 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001402 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001403 * bytes in the input buffer.
1404 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001405 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001406 */
1407
1408int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001409xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1410{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001411 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001412 /*
1413 * We are supposed to handle UTF8, check it's valid
1414 * From rfc2044: encoding of the Unicode values on UTF-8:
1415 *
1416 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1417 * 0000 0000-0000 007F 0xxxxxxx
1418 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1419 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1420 *
1421 * Check for the 0x110000 limit too
1422 */
1423 unsigned char c;
1424 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001425
Daniel Veillardd8224e02002-01-13 15:43:22 +00001426 c = *cur;
1427 if (c & 0x80) {
1428 if ((cur[1] & 0xc0) != 0x80)
1429 goto encoding_error;
1430 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001431
Daniel Veillardd8224e02002-01-13 15:43:22 +00001432 if ((cur[2] & 0xc0) != 0x80)
1433 goto encoding_error;
1434 if ((c & 0xf0) == 0xf0) {
1435 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1436 goto encoding_error;
1437 /* 4-byte code */
1438 *len = 4;
1439 val = (cur[0] & 0x7) << 18;
1440 val |= (cur[1] & 0x3f) << 12;
1441 val |= (cur[2] & 0x3f) << 6;
1442 val |= cur[3] & 0x3f;
1443 } else {
1444 /* 3-byte code */
1445 *len = 3;
1446 val = (cur[0] & 0xf) << 12;
1447 val |= (cur[1] & 0x3f) << 6;
1448 val |= cur[2] & 0x3f;
1449 }
1450 } else {
1451 /* 2-byte code */
1452 *len = 2;
1453 val = (cur[0] & 0x1f) << 6;
1454 val |= cur[1] & 0x3f;
1455 }
1456 if (!IS_CHAR(val)) {
1457 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1458 (ctxt->sax->error != NULL))
1459 ctxt->sax->error(ctxt->userData,
1460 "Char 0x%X out of allowed range\n",
1461 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001462 if (ctxt != NULL) {
1463 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1464 ctxt->wellFormed = 0;
1465 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1466 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001467 }
1468 return (val);
1469 } else {
1470 /* 1-byte code */
1471 *len = 1;
1472 return ((int) *cur);
1473 }
Owen Taylor3473f882001-02-23 17:55:21 +00001474 }
1475 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001476 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001477 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * XML constructs only use < 128 chars
1479 */
1480 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001481 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001482encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001483
Owen Taylor3473f882001-02-23 17:55:21 +00001484 /*
1485 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001486 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001487 * declaration header. Report the error and switch the encoding
1488 * to ISO-Latin-1 (if you don't like this policy, just declare the
1489 * encoding !)
1490 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001491 if (ctxt != NULL) {
1492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1493 ctxt->sax->error(ctxt->userData,
1494 "Input is not proper UTF-8, indicate encoding !\n");
1495 ctxt->sax->error(ctxt->userData,
1496 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1497 ctxt->input->cur[0], ctxt->input->cur[1],
1498 ctxt->input->cur[2], ctxt->input->cur[3]);
1499 }
1500 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001501 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001502 }
Owen Taylor3473f882001-02-23 17:55:21 +00001503
1504 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001505 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001506}
1507
1508/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001509 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001510 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001511 * @val: the char value
1512 *
1513 * append the char value in the array
1514 *
1515 * Returns the number of xmlChar written
1516 */
Owen Taylor3473f882001-02-23 17:55:21 +00001517int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001518xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 /*
1520 * We are supposed to handle UTF8, check it's valid
1521 * From rfc2044: encoding of the Unicode values on UTF-8:
1522 *
1523 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1524 * 0000 0000-0000 007F 0xxxxxxx
1525 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1526 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1527 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 if (val >= 0x80) {
1529 xmlChar *savedout = out;
1530 int bits;
1531 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1532 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1533 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1534 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001535 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001536 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001537 val);
1538 return(0);
1539 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001540 for ( ; bits >= 0; bits-= 6)
1541 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1542 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001543 }
1544 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001545 return 1;
1546}
1547
1548/**
1549 * xmlCopyChar:
1550 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001551 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001552 * @val: the char value
1553 *
1554 * append the char value in the array
1555 *
1556 * Returns the number of xmlChar written
1557 */
1558
1559int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001560xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001561 /* the len parameter is ignored */
1562 if (val >= 0x80) {
1563 return(xmlCopyCharMultiByte (out, val));
1564 }
1565 *out = (xmlChar) val;
1566 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001567}
1568
1569/************************************************************************
1570 * *
1571 * Commodity functions to switch encodings *
1572 * *
1573 ************************************************************************/
1574
1575/**
1576 * xmlSwitchEncoding:
1577 * @ctxt: the parser context
1578 * @enc: the encoding value (number)
1579 *
1580 * change the input functions when discovering the character encoding
1581 * of a given entity.
1582 *
1583 * Returns 0 in case of success, -1 otherwise
1584 */
1585int
1586xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1587{
1588 xmlCharEncodingHandlerPtr handler;
1589
1590 switch (enc) {
1591 case XML_CHAR_ENCODING_ERROR:
1592 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1594 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001597 break;
1598 case XML_CHAR_ENCODING_NONE:
1599 /* let's assume it's UTF-8 without the XML decl */
1600 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1601 return(0);
1602 case XML_CHAR_ENCODING_UTF8:
1603 /* default encoding, no conversion should be needed */
1604 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001605
1606 /*
1607 * Errata on XML-1.0 June 20 2001
1608 * Specific handling of the Byte Order Mark for
1609 * UTF-8
1610 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001611 if ((ctxt->input != NULL) &&
1612 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001613 (ctxt->input->cur[1] == 0xBB) &&
1614 (ctxt->input->cur[2] == 0xBF)) {
1615 ctxt->input->cur += 3;
1616 }
Owen Taylor3473f882001-02-23 17:55:21 +00001617 return(0);
1618 default:
1619 break;
1620 }
1621 handler = xmlGetCharEncodingHandler(enc);
1622 if (handler == NULL) {
1623 /*
1624 * Default handlers.
1625 */
1626 switch (enc) {
1627 case XML_CHAR_ENCODING_ERROR:
1628 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1630 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001633 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1634 break;
1635 case XML_CHAR_ENCODING_NONE:
1636 /* let's assume it's UTF-8 without the XML decl */
1637 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1638 return(0);
1639 case XML_CHAR_ENCODING_UTF8:
1640 case XML_CHAR_ENCODING_ASCII:
1641 /* default encoding, no conversion should be needed */
1642 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1643 return(0);
1644 case XML_CHAR_ENCODING_UTF16LE:
1645 break;
1646 case XML_CHAR_ENCODING_UTF16BE:
1647 break;
1648 case XML_CHAR_ENCODING_UCS4LE:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding USC4 little endian not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS4BE:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding USC4 big endian not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_EBCDIC:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding EBCDIC not supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_UCS4_2143:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding UCS4 2143 not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_UCS4_3412:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding UCS4 3412 not supported\n");
1677 break;
1678 case XML_CHAR_ENCODING_UCS2:
1679 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1681 ctxt->sax->error(ctxt->userData,
1682 "char encoding UCS2 not supported\n");
1683 break;
1684 case XML_CHAR_ENCODING_8859_1:
1685 case XML_CHAR_ENCODING_8859_2:
1686 case XML_CHAR_ENCODING_8859_3:
1687 case XML_CHAR_ENCODING_8859_4:
1688 case XML_CHAR_ENCODING_8859_5:
1689 case XML_CHAR_ENCODING_8859_6:
1690 case XML_CHAR_ENCODING_8859_7:
1691 case XML_CHAR_ENCODING_8859_8:
1692 case XML_CHAR_ENCODING_8859_9:
1693 /*
1694 * We used to keep the internal content in the
1695 * document encoding however this turns being unmaintainable
1696 * So xmlGetCharEncodingHandler() will return non-null
1697 * values for this now.
1698 */
1699 if ((ctxt->inputNr == 1) &&
1700 (ctxt->encoding == NULL) &&
1701 (ctxt->input->encoding != NULL)) {
1702 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1703 }
1704 ctxt->charset = enc;
1705 return(0);
1706 case XML_CHAR_ENCODING_2022_JP:
1707 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "char encoding ISO-2022-JPnot supported\n");
1711 break;
1712 case XML_CHAR_ENCODING_SHIFT_JIS:
1713 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1715 ctxt->sax->error(ctxt->userData,
1716 "char encoding Shift_JIS not supported\n");
1717 break;
1718 case XML_CHAR_ENCODING_EUC_JP:
1719 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1721 ctxt->sax->error(ctxt->userData,
1722 "char encoding EUC-JPnot supported\n");
1723 break;
1724 }
1725 }
1726 if (handler == NULL)
1727 return(-1);
1728 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1729 return(xmlSwitchToEncoding(ctxt, handler));
1730}
1731
1732/**
1733 * xmlSwitchToEncoding:
1734 * @ctxt: the parser context
1735 * @handler: the encoding handler
1736 *
1737 * change the input functions when discovering the character encoding
1738 * of a given entity.
1739 *
1740 * Returns 0 in case of success, -1 otherwise
1741 */
1742int
1743xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1744{
1745 int nbchars;
1746
1747 if (handler != NULL) {
1748 if (ctxt->input != NULL) {
1749 if (ctxt->input->buf != NULL) {
1750 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001751 /*
1752 * Check in case the auto encoding detetection triggered
1753 * in already.
1754 */
Owen Taylor3473f882001-02-23 17:55:21 +00001755 if (ctxt->input->buf->encoder == handler)
1756 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001757
1758 /*
1759 * "UTF-16" can be used for both LE and BE
1760 */
1761 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1762 BAD_CAST "UTF-16", 6)) &&
1763 (!xmlStrncmp(BAD_CAST handler->name,
1764 BAD_CAST "UTF-16", 6))) {
1765 return(0);
1766 }
1767
Owen Taylor3473f882001-02-23 17:55:21 +00001768 /*
1769 * Note: this is a bit dangerous, but that's what it
1770 * takes to use nearly compatible signature for different
1771 * encodings.
1772 */
1773 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1774 ctxt->input->buf->encoder = handler;
1775 return(0);
1776 }
1777 ctxt->input->buf->encoder = handler;
1778
1779 /*
1780 * Is there already some content down the pipe to convert ?
1781 */
1782 if ((ctxt->input->buf->buffer != NULL) &&
1783 (ctxt->input->buf->buffer->use > 0)) {
1784 int processed;
1785
1786 /*
1787 * Specific handling of the Byte Order Mark for
1788 * UTF-16
1789 */
1790 if ((handler->name != NULL) &&
1791 (!strcmp(handler->name, "UTF-16LE")) &&
1792 (ctxt->input->cur[0] == 0xFF) &&
1793 (ctxt->input->cur[1] == 0xFE)) {
1794 ctxt->input->cur += 2;
1795 }
1796 if ((handler->name != NULL) &&
1797 (!strcmp(handler->name, "UTF-16BE")) &&
1798 (ctxt->input->cur[0] == 0xFE) &&
1799 (ctxt->input->cur[1] == 0xFF)) {
1800 ctxt->input->cur += 2;
1801 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001802 /*
1803 * Errata on XML-1.0 June 20 2001
1804 * Specific handling of the Byte Order Mark for
1805 * UTF-8
1806 */
1807 if ((handler->name != NULL) &&
1808 (!strcmp(handler->name, "UTF-8")) &&
1809 (ctxt->input->cur[0] == 0xEF) &&
1810 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001811 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001812 ctxt->input->cur += 3;
1813 }
Owen Taylor3473f882001-02-23 17:55:21 +00001814
1815 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001816 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001817 * Move it as the raw buffer and create a new input buffer
1818 */
1819 processed = ctxt->input->cur - ctxt->input->base;
1820 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1821 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1822 ctxt->input->buf->buffer = xmlBufferCreate();
1823
1824 if (ctxt->html) {
1825 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001826 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001827 */
1828 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1829 ctxt->input->buf->buffer,
1830 ctxt->input->buf->raw);
1831 } else {
1832 /*
1833 * convert just enough to get
1834 * '<?xml version="1.0" encoding="xxx"?>'
1835 * parsed with the autodetected encoding
1836 * into the parser reading buffer.
1837 */
1838 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1839 ctxt->input->buf->buffer,
1840 ctxt->input->buf->raw);
1841 }
1842 if (nbchars < 0) {
1843 xmlGenericError(xmlGenericErrorContext,
1844 "xmlSwitchToEncoding: encoder error\n");
1845 return(-1);
1846 }
1847 ctxt->input->base =
1848 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001849 ctxt->input->end =
1850 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001851
1852 }
1853 return(0);
1854 } else {
1855 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1856 /*
1857 * When parsing a static memory array one must know the
1858 * size to be able to convert the buffer.
1859 */
1860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1861 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001862 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001863 return(-1);
1864 } else {
1865 int processed;
1866
1867 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001868 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001869 * Move it as the raw buffer and create a new input buffer
1870 */
1871 processed = ctxt->input->cur - ctxt->input->base;
1872
1873 ctxt->input->buf->raw = xmlBufferCreate();
1874 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1875 ctxt->input->length - processed);
1876 ctxt->input->buf->buffer = xmlBufferCreate();
1877
1878 /*
1879 * convert as much as possible of the raw input
1880 * to the parser reading buffer.
1881 */
1882 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1883 ctxt->input->buf->buffer,
1884 ctxt->input->buf->raw);
1885 if (nbchars < 0) {
1886 xmlGenericError(xmlGenericErrorContext,
1887 "xmlSwitchToEncoding: encoder error\n");
1888 return(-1);
1889 }
1890
1891 /*
1892 * Conversion succeeded, get rid of the old buffer
1893 */
1894 if ((ctxt->input->free != NULL) &&
1895 (ctxt->input->base != NULL))
1896 ctxt->input->free((xmlChar *) ctxt->input->base);
1897 ctxt->input->base =
1898 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001899 ctxt->input->end =
1900 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001901 }
1902 }
1903 } else {
1904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1905 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001906 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001907 return(-1);
1908 }
1909 /*
1910 * The parsing is now done in UTF8 natively
1911 */
1912 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1913 } else
1914 return(-1);
1915 return(0);
1916
1917}
1918
1919/************************************************************************
1920 * *
1921 * Commodity functions to handle entities processing *
1922 * *
1923 ************************************************************************/
1924
1925/**
1926 * xmlFreeInputStream:
1927 * @input: an xmlParserInputPtr
1928 *
1929 * Free up an input stream.
1930 */
1931void
1932xmlFreeInputStream(xmlParserInputPtr input) {
1933 if (input == NULL) return;
1934
1935 if (input->filename != NULL) xmlFree((char *) input->filename);
1936 if (input->directory != NULL) xmlFree((char *) input->directory);
1937 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1938 if (input->version != NULL) xmlFree((char *) input->version);
1939 if ((input->free != NULL) && (input->base != NULL))
1940 input->free((xmlChar *) input->base);
1941 if (input->buf != NULL)
1942 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001943 xmlFree(input);
1944}
1945
1946/**
1947 * xmlNewInputStream:
1948 * @ctxt: an XML parser context
1949 *
1950 * Create a new input stream structure
1951 * Returns the new input stream or NULL
1952 */
1953xmlParserInputPtr
1954xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1955 xmlParserInputPtr input;
1956
1957 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1958 if (input == NULL) {
1959 if (ctxt != NULL) {
1960 ctxt->errNo = XML_ERR_NO_MEMORY;
1961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962 ctxt->sax->error(ctxt->userData,
1963 "malloc: couldn't allocate a new input stream\n");
1964 ctxt->errNo = XML_ERR_NO_MEMORY;
1965 }
1966 return(NULL);
1967 }
1968 memset(input, 0, sizeof(xmlParserInput));
1969 input->line = 1;
1970 input->col = 1;
1971 input->standalone = -1;
1972 return(input);
1973}
1974
1975/**
1976 * xmlNewIOInputStream:
1977 * @ctxt: an XML parser context
1978 * @input: an I/O Input
1979 * @enc: the charset encoding if known
1980 *
1981 * Create a new input stream structure encapsulating the @input into
1982 * a stream suitable for the parser.
1983 *
1984 * Returns the new input stream or NULL
1985 */
1986xmlParserInputPtr
1987xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1988 xmlCharEncoding enc) {
1989 xmlParserInputPtr inputStream;
1990
1991 if (xmlParserDebugEntities)
1992 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1993 inputStream = xmlNewInputStream(ctxt);
1994 if (inputStream == NULL) {
1995 return(NULL);
1996 }
1997 inputStream->filename = NULL;
1998 inputStream->buf = input;
1999 inputStream->base = inputStream->buf->buffer->content;
2000 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002001 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002002 if (enc != XML_CHAR_ENCODING_NONE) {
2003 xmlSwitchEncoding(ctxt, enc);
2004 }
2005
2006 return(inputStream);
2007}
2008
2009/**
2010 * xmlNewEntityInputStream:
2011 * @ctxt: an XML parser context
2012 * @entity: an Entity pointer
2013 *
2014 * Create a new input stream based on an xmlEntityPtr
2015 *
2016 * Returns the new input stream or NULL
2017 */
2018xmlParserInputPtr
2019xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2020 xmlParserInputPtr input;
2021
2022 if (entity == NULL) {
2023 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2025 ctxt->sax->error(ctxt->userData,
2026 "internal: xmlNewEntityInputStream entity = NULL\n");
2027 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2028 return(NULL);
2029 }
2030 if (xmlParserDebugEntities)
2031 xmlGenericError(xmlGenericErrorContext,
2032 "new input from entity: %s\n", entity->name);
2033 if (entity->content == NULL) {
2034 switch (entity->etype) {
2035 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2036 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038 ctxt->sax->error(ctxt->userData,
2039 "xmlNewEntityInputStream unparsed entity !\n");
2040 break;
2041 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2042 case XML_EXTERNAL_PARAMETER_ENTITY:
2043 return(xmlLoadExternalEntity((char *) entity->URI,
2044 (char *) entity->ExternalID, ctxt));
2045 case XML_INTERNAL_GENERAL_ENTITY:
2046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2047 ctxt->sax->error(ctxt->userData,
2048 "Internal entity %s without content !\n", entity->name);
2049 break;
2050 case XML_INTERNAL_PARAMETER_ENTITY:
2051 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "Internal parameter entity %s without content !\n", entity->name);
2055 break;
2056 case XML_INTERNAL_PREDEFINED_ENTITY:
2057 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2059 ctxt->sax->error(ctxt->userData,
2060 "Predefined entity %s without content !\n", entity->name);
2061 break;
2062 }
2063 return(NULL);
2064 }
2065 input = xmlNewInputStream(ctxt);
2066 if (input == NULL) {
2067 return(NULL);
2068 }
2069 input->filename = (char *) entity->URI;
2070 input->base = entity->content;
2071 input->cur = entity->content;
2072 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002073 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002074 return(input);
2075}
2076
2077/**
2078 * xmlNewStringInputStream:
2079 * @ctxt: an XML parser context
2080 * @buffer: an memory buffer
2081 *
2082 * Create a new input stream based on a memory buffer.
2083 * Returns the new input stream
2084 */
2085xmlParserInputPtr
2086xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2087 xmlParserInputPtr input;
2088
2089 if (buffer == NULL) {
2090 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2092 ctxt->sax->error(ctxt->userData,
2093 "internal: xmlNewStringInputStream string = NULL\n");
2094 return(NULL);
2095 }
2096 if (xmlParserDebugEntities)
2097 xmlGenericError(xmlGenericErrorContext,
2098 "new fixed input: %.30s\n", buffer);
2099 input = xmlNewInputStream(ctxt);
2100 if (input == NULL) {
2101 return(NULL);
2102 }
2103 input->base = buffer;
2104 input->cur = buffer;
2105 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002106 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002107 return(input);
2108}
2109
2110/**
2111 * xmlNewInputFromFile:
2112 * @ctxt: an XML parser context
2113 * @filename: the filename to use as entity
2114 *
2115 * Create a new input stream based on a file.
2116 *
2117 * Returns the new input stream or NULL in case of error
2118 */
2119xmlParserInputPtr
2120xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2121 xmlParserInputBufferPtr buf;
2122 xmlParserInputPtr inputStream;
2123 char *directory = NULL;
2124 xmlChar *URI = NULL;
2125
2126 if (xmlParserDebugEntities)
2127 xmlGenericError(xmlGenericErrorContext,
2128 "new input from file: %s\n", filename);
2129 if (ctxt == NULL) return(NULL);
2130 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2131 if (buf == NULL)
2132 return(NULL);
2133
2134 URI = xmlStrdup((xmlChar *) filename);
2135 directory = xmlParserGetDirectory((const char *) URI);
2136
2137 inputStream = xmlNewInputStream(ctxt);
2138 if (inputStream == NULL) {
2139 if (directory != NULL) xmlFree((char *) directory);
2140 if (URI != NULL) xmlFree((char *) URI);
2141 return(NULL);
2142 }
2143
2144 inputStream->filename = (const char *) URI;
2145 inputStream->directory = directory;
2146 inputStream->buf = buf;
2147
2148 inputStream->base = inputStream->buf->buffer->content;
2149 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002150 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002151 if ((ctxt->directory == NULL) && (directory != NULL))
2152 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2153 return(inputStream);
2154}
2155
2156/************************************************************************
2157 * *
2158 * Commodity functions to handle parser contexts *
2159 * *
2160 ************************************************************************/
2161
2162/**
2163 * xmlInitParserCtxt:
2164 * @ctxt: an XML parser context
2165 *
2166 * Initialize a parser context
2167 */
2168
2169void
2170xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2171{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002172 if(ctxt==NULL) {
2173 xmlGenericError(xmlGenericErrorContext,
2174 "xmlInitParserCtxt: NULL context given\n");
2175 return;
2176 }
2177
Owen Taylor3473f882001-02-23 17:55:21 +00002178 xmlDefaultSAXHandlerInit();
2179
William M. Brack8b2c7f12002-11-22 05:07:29 +00002180 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2181 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002182 xmlGenericError(xmlGenericErrorContext,
2183 "xmlInitParserCtxt: out of memory\n");
2184 }
2185 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002186 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002187
2188 /* Allocate the Input stack */
2189 ctxt->inputTab = (xmlParserInputPtr *)
2190 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2191 if (ctxt->inputTab == NULL) {
2192 xmlGenericError(xmlGenericErrorContext,
2193 "xmlInitParserCtxt: out of memory\n");
2194 ctxt->inputNr = 0;
2195 ctxt->inputMax = 0;
2196 ctxt->input = NULL;
2197 return;
2198 }
2199 ctxt->inputNr = 0;
2200 ctxt->inputMax = 5;
2201 ctxt->input = NULL;
2202
2203 ctxt->version = NULL;
2204 ctxt->encoding = NULL;
2205 ctxt->standalone = -1;
2206 ctxt->hasExternalSubset = 0;
2207 ctxt->hasPErefs = 0;
2208 ctxt->html = 0;
2209 ctxt->external = 0;
2210 ctxt->instate = XML_PARSER_START;
2211 ctxt->token = 0;
2212 ctxt->directory = NULL;
2213
2214 /* Allocate the Node stack */
2215 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2216 if (ctxt->nodeTab == NULL) {
2217 xmlGenericError(xmlGenericErrorContext,
2218 "xmlInitParserCtxt: out of memory\n");
2219 ctxt->nodeNr = 0;
2220 ctxt->nodeMax = 0;
2221 ctxt->node = NULL;
2222 ctxt->inputNr = 0;
2223 ctxt->inputMax = 0;
2224 ctxt->input = NULL;
2225 return;
2226 }
2227 ctxt->nodeNr = 0;
2228 ctxt->nodeMax = 10;
2229 ctxt->node = NULL;
2230
2231 /* Allocate the Name stack */
2232 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2233 if (ctxt->nameTab == NULL) {
2234 xmlGenericError(xmlGenericErrorContext,
2235 "xmlInitParserCtxt: out of memory\n");
2236 ctxt->nodeNr = 0;
2237 ctxt->nodeMax = 0;
2238 ctxt->node = NULL;
2239 ctxt->inputNr = 0;
2240 ctxt->inputMax = 0;
2241 ctxt->input = NULL;
2242 ctxt->nameNr = 0;
2243 ctxt->nameMax = 0;
2244 ctxt->name = NULL;
2245 return;
2246 }
2247 ctxt->nameNr = 0;
2248 ctxt->nameMax = 10;
2249 ctxt->name = NULL;
2250
2251 /* Allocate the space stack */
2252 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2253 if (ctxt->spaceTab == NULL) {
2254 xmlGenericError(xmlGenericErrorContext,
2255 "xmlInitParserCtxt: out of memory\n");
2256 ctxt->nodeNr = 0;
2257 ctxt->nodeMax = 0;
2258 ctxt->node = NULL;
2259 ctxt->inputNr = 0;
2260 ctxt->inputMax = 0;
2261 ctxt->input = NULL;
2262 ctxt->nameNr = 0;
2263 ctxt->nameMax = 0;
2264 ctxt->name = NULL;
2265 ctxt->spaceNr = 0;
2266 ctxt->spaceMax = 0;
2267 ctxt->space = NULL;
2268 return;
2269 }
2270 ctxt->spaceNr = 1;
2271 ctxt->spaceMax = 10;
2272 ctxt->spaceTab[0] = -1;
2273 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002274 ctxt->userData = ctxt;
2275 ctxt->myDoc = NULL;
2276 ctxt->wellFormed = 1;
2277 ctxt->valid = 1;
2278 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2279 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2280 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002281 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002283 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002284 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002285
Owen Taylor3473f882001-02-23 17:55:21 +00002286 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002287 ctxt->vctxt.error = xmlParserValidityError;
2288 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002290 if (xmlGetWarningsDefaultValue == 0)
2291 ctxt->vctxt.warning = NULL;
2292 else
2293 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002294 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
2296 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2297 ctxt->record_info = 0;
2298 ctxt->nbChars = 0;
2299 ctxt->checkIndex = 0;
2300 ctxt->inSubset = 0;
2301 ctxt->errNo = XML_ERR_OK;
2302 ctxt->depth = 0;
2303 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002304 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlInitNodeInfoSeq(&ctxt->node_seq);
2306}
2307
2308/**
2309 * xmlFreeParserCtxt:
2310 * @ctxt: an XML parser context
2311 *
2312 * Free all the memory used by a parser context. However the parsed
2313 * document in ctxt->myDoc is not freed.
2314 */
2315
2316void
2317xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2318{
2319 xmlParserInputPtr input;
2320 xmlChar *oldname;
2321
2322 if (ctxt == NULL) return;
2323
2324 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2325 xmlFreeInputStream(input);
2326 }
2327 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2328 xmlFree(oldname);
2329 }
2330 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2331 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2332 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2333 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2334 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2335 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2336 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2337 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2338 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2340 xmlFree(ctxt->sax);
2341 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002342 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002343#ifdef LIBXML_CATALOG_ENABLED
2344 if (ctxt->catalogs != NULL)
2345 xmlCatalogFreeLocal(ctxt->catalogs);
2346#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002347 xmlFree(ctxt);
2348}
2349
2350/**
2351 * xmlNewParserCtxt:
2352 *
2353 * Allocate and initialize a new parser context.
2354 *
2355 * Returns the xmlParserCtxtPtr or NULL
2356 */
2357
2358xmlParserCtxtPtr
2359xmlNewParserCtxt()
2360{
2361 xmlParserCtxtPtr ctxt;
2362
2363 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2364 if (ctxt == NULL) {
2365 xmlGenericError(xmlGenericErrorContext,
2366 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002367 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002368 return(NULL);
2369 }
2370 memset(ctxt, 0, sizeof(xmlParserCtxt));
2371 xmlInitParserCtxt(ctxt);
2372 return(ctxt);
2373}
2374
2375/************************************************************************
2376 * *
2377 * Handling of node informations *
2378 * *
2379 ************************************************************************/
2380
2381/**
2382 * xmlClearParserCtxt:
2383 * @ctxt: an XML parser context
2384 *
2385 * Clear (release owned resources) and reinitialize a parser context
2386 */
2387
2388void
2389xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2390{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002391 if (ctxt==NULL)
2392 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002393 xmlClearNodeInfoSeq(&ctxt->node_seq);
2394 xmlInitParserCtxt(ctxt);
2395}
2396
2397/**
2398 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002399 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002400 * @node: an XML node within the tree
2401 *
2402 * Find the parser node info struct for a given node
2403 *
2404 * Returns an xmlParserNodeInfo block pointer or NULL
2405 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002406const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2407 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002408{
2409 unsigned long pos;
2410
2411 /* Find position where node should be at */
2412 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002413 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002414 return &ctx->node_seq.buffer[pos];
2415 else
2416 return NULL;
2417}
2418
2419
2420/**
2421 * xmlInitNodeInfoSeq:
2422 * @seq: a node info sequence pointer
2423 *
2424 * -- Initialize (set to initial state) node info sequence
2425 */
2426void
2427xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2428{
2429 seq->length = 0;
2430 seq->maximum = 0;
2431 seq->buffer = NULL;
2432}
2433
2434/**
2435 * xmlClearNodeInfoSeq:
2436 * @seq: a node info sequence pointer
2437 *
2438 * -- Clear (release memory and reinitialize) node
2439 * info sequence
2440 */
2441void
2442xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2443{
2444 if ( seq->buffer != NULL )
2445 xmlFree(seq->buffer);
2446 xmlInitNodeInfoSeq(seq);
2447}
2448
2449
2450/**
2451 * xmlParserFindNodeInfoIndex:
2452 * @seq: a node info sequence pointer
2453 * @node: an XML node pointer
2454 *
2455 *
2456 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2457 * the given node is or should be at in a sorted sequence
2458 *
2459 * Returns a long indicating the position of the record
2460 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002461unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2462 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002463{
2464 unsigned long upper, lower, middle;
2465 int found = 0;
2466
2467 /* Do a binary search for the key */
2468 lower = 1;
2469 upper = seq->length;
2470 middle = 0;
2471 while ( lower <= upper && !found) {
2472 middle = lower + (upper - lower) / 2;
2473 if ( node == seq->buffer[middle - 1].node )
2474 found = 1;
2475 else if ( node < seq->buffer[middle - 1].node )
2476 upper = middle - 1;
2477 else
2478 lower = middle + 1;
2479 }
2480
2481 /* Return position */
2482 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2483 return middle;
2484 else
2485 return middle - 1;
2486}
2487
2488
2489/**
2490 * xmlParserAddNodeInfo:
2491 * @ctxt: an XML parser context
2492 * @info: a node info sequence pointer
2493 *
2494 * Insert node info record into the sorted sequence
2495 */
2496void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002497xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002498 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002499{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002500 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002501
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002502 /* Find pos and check to see if node is already in the sequence */
2503 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2504 info->node);
2505 if (pos < ctxt->node_seq.length
2506 && ctxt->node_seq.buffer[pos].node == info->node) {
2507 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002508 }
2509
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002510 /* Otherwise, we need to add new node to buffer */
2511 else {
2512 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2513 xmlParserNodeInfo *tmp_buffer;
2514 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002515
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002516 if (ctxt->node_seq.maximum == 0)
2517 ctxt->node_seq.maximum = 2;
2518 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2519 (2 * ctxt->node_seq.maximum));
2520
2521 if (ctxt->node_seq.buffer == NULL)
2522 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2523 else
2524 tmp_buffer =
2525 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2526 byte_size);
2527
2528 if (tmp_buffer == NULL) {
2529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2530 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2531 ctxt->errNo = XML_ERR_NO_MEMORY;
2532 return;
2533 }
2534 ctxt->node_seq.buffer = tmp_buffer;
2535 ctxt->node_seq.maximum *= 2;
2536 }
2537
2538 /* If position is not at end, move elements out of the way */
2539 if (pos != ctxt->node_seq.length) {
2540 unsigned long i;
2541
2542 for (i = ctxt->node_seq.length; i > pos; i--)
2543 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2544 }
2545
2546 /* Copy element and increase length */
2547 ctxt->node_seq.buffer[pos] = *info;
2548 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002549 }
Owen Taylor3473f882001-02-23 17:55:21 +00002550}
2551
2552/************************************************************************
2553 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002554 * Defaults settings *
2555 * *
2556 ************************************************************************/
2557/**
2558 * xmlPedanticParserDefault:
2559 * @val: int 0 or 1
2560 *
2561 * Set and return the previous value for enabling pedantic warnings.
2562 *
2563 * Returns the last value for 0 for no substitution, 1 for substitution.
2564 */
2565
2566int
2567xmlPedanticParserDefault(int val) {
2568 int old = xmlPedanticParserDefaultValue;
2569
2570 xmlPedanticParserDefaultValue = val;
2571 return(old);
2572}
2573
2574/**
2575 * xmlLineNumbersDefault:
2576 * @val: int 0 or 1
2577 *
2578 * Set and return the previous value for enabling line numbers in elements
2579 * contents. This may break on old application and is turned off by default.
2580 *
2581 * Returns the last value for 0 for no substitution, 1 for substitution.
2582 */
2583
2584int
2585xmlLineNumbersDefault(int val) {
2586 int old = xmlLineNumbersDefaultValue;
2587
2588 xmlLineNumbersDefaultValue = val;
2589 return(old);
2590}
2591
2592/**
2593 * xmlSubstituteEntitiesDefault:
2594 * @val: int 0 or 1
2595 *
2596 * Set and return the previous value for default entity support.
2597 * Initially the parser always keep entity references instead of substituting
2598 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002599 * default parser behavior
2600 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002601 * file basis.
2602 *
2603 * Returns the last value for 0 for no substitution, 1 for substitution.
2604 */
2605
2606int
2607xmlSubstituteEntitiesDefault(int val) {
2608 int old = xmlSubstituteEntitiesDefaultValue;
2609
2610 xmlSubstituteEntitiesDefaultValue = val;
2611 return(old);
2612}
2613
2614/**
2615 * xmlKeepBlanksDefault:
2616 * @val: int 0 or 1
2617 *
2618 * Set and return the previous value for default blanks text nodes support.
2619 * The 1.x version of the parser used an heuristic to try to detect
2620 * ignorable white spaces. As a result the SAX callback was generating
2621 * ignorableWhitespace() callbacks instead of characters() one, and when
2622 * using the DOM output text nodes containing those blanks were not generated.
2623 * The 2.x and later version will switch to the XML standard way and
2624 * ignorableWhitespace() are only generated when running the parser in
2625 * validating mode and when the current element doesn't allow CDATA or
2626 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002627 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002628 * on 1.X libs and to switch back to the old mode for compatibility when
2629 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2630 * by using xmlIsBlankNode() commodity function to detect the "empty"
2631 * nodes generated.
2632 * This value also affect autogeneration of indentation when saving code
2633 * if blanks sections are kept, indentation is not generated.
2634 *
2635 * Returns the last value for 0 for no substitution, 1 for substitution.
2636 */
2637
2638int
2639xmlKeepBlanksDefault(int val) {
2640 int old = xmlKeepBlanksDefaultValue;
2641
2642 xmlKeepBlanksDefaultValue = val;
2643 xmlIndentTreeOutput = !val;
2644 return(old);
2645}
2646
2647/************************************************************************
2648 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002649 * Deprecated functions kept for compatibility *
2650 * *
2651 ************************************************************************/
2652
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002653/**
2654 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002655 * @lang: pointer to the string value
2656 *
2657 * Checks that the value conforms to the LanguageID production:
2658 *
2659 * NOTE: this is somewhat deprecated, those productions were removed from
2660 * the XML Second edition.
2661 *
2662 * [33] LanguageID ::= Langcode ('-' Subcode)*
2663 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2664 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2665 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2666 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2667 * [38] Subcode ::= ([a-z] | [A-Z])+
2668 *
2669 * Returns 1 if correct 0 otherwise
2670 **/
2671int
2672xmlCheckLanguageID(const xmlChar *lang) {
2673 const xmlChar *cur = lang;
2674
2675 if (cur == NULL)
2676 return(0);
2677 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2678 ((cur[0] == 'I') && (cur[1] == '-'))) {
2679 /*
2680 * IANA code
2681 */
2682 cur += 2;
2683 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2684 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2685 cur++;
2686 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2687 ((cur[0] == 'X') && (cur[1] == '-'))) {
2688 /*
2689 * User code
2690 */
2691 cur += 2;
2692 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2693 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2694 cur++;
2695 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2696 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2697 /*
2698 * ISO639
2699 */
2700 cur++;
2701 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2702 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2703 cur++;
2704 else
2705 return(0);
2706 } else
2707 return(0);
2708 while (cur[0] != 0) { /* non input consuming */
2709 if (cur[0] != '-')
2710 return(0);
2711 cur++;
2712 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2713 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2714 cur++;
2715 else
2716 return(0);
2717 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2718 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2719 cur++;
2720 }
2721 return(1);
2722}
2723
2724/**
2725 * xmlDecodeEntities:
2726 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002727 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002728 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002729 * @end: an end marker xmlChar, 0 if none
2730 * @end2: an end marker xmlChar, 0 if none
2731 * @end3: an end marker xmlChar, 0 if none
2732 *
2733 * This function is deprecated, we now always process entities content
2734 * through xmlStringDecodeEntities
2735 *
2736 * TODO: remove it in next major release.
2737 *
2738 * [67] Reference ::= EntityRef | CharRef
2739 *
2740 * [69] PEReference ::= '%' Name ';'
2741 *
2742 * Returns A newly allocated string with the substitution done. The caller
2743 * must deallocate it !
2744 */
2745xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002746xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2747 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002748#if 0
2749 xmlChar *buffer = NULL;
2750 unsigned int buffer_size = 0;
2751 unsigned int nbchars = 0;
2752
2753 xmlChar *current = NULL;
2754 xmlEntityPtr ent;
2755 unsigned int max = (unsigned int) len;
2756 int c,l;
2757#endif
2758
2759 static int deprecated = 0;
2760 if (!deprecated) {
2761 xmlGenericError(xmlGenericErrorContext,
2762 "xmlDecodeEntities() deprecated function reached\n");
2763 deprecated = 1;
2764 }
2765
2766#if 0
2767 if (ctxt->depth > 40) {
2768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2769 ctxt->sax->error(ctxt->userData,
2770 "Detected entity reference loop\n");
2771 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002772 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002773 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2774 return(NULL);
2775 }
2776
2777 /*
2778 * allocate a translation buffer.
2779 */
2780 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2781 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2782 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002783 xmlGenericError(xmlGenericErrorContext,
2784 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002785 return(NULL);
2786 }
2787
2788 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002789 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002790 */
2791 GROW;
2792 c = CUR_CHAR(l);
2793 while ((nbchars < max) && (c != end) && /* NOTUSED */
2794 (c != end2) && (c != end3)) {
2795 GROW;
2796 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002797 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002798 int val = xmlParseCharRef(ctxt);
2799 COPY_BUF(0,buffer,nbchars,val);
2800 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002801 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002802 (what & XML_SUBSTITUTE_REF)) {
2803 if (xmlParserDebugEntities)
2804 xmlGenericError(xmlGenericErrorContext,
2805 "decoding Entity Reference\n");
2806 ent = xmlParseEntityRef(ctxt);
2807 if ((ent != NULL) &&
2808 (ctxt->replaceEntities != 0)) {
2809 current = ent->content;
2810 while (*current != 0) { /* non input consuming loop */
2811 buffer[nbchars++] = *current++;
2812 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2813 growBuffer(buffer);
2814 }
2815 }
2816 } else if (ent != NULL) {
2817 const xmlChar *cur = ent->name;
2818
2819 buffer[nbchars++] = '&';
2820 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2821 growBuffer(buffer);
2822 }
2823 while (*cur != 0) { /* non input consuming loop */
2824 buffer[nbchars++] = *cur++;
2825 }
2826 buffer[nbchars++] = ';';
2827 }
2828 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2829 /*
2830 * a PEReference induce to switch the entity flow,
2831 * we break here to flush the current set of chars
2832 * parsed if any. We will be called back later.
2833 */
2834 if (xmlParserDebugEntities)
2835 xmlGenericError(xmlGenericErrorContext,
2836 "decoding PE Reference\n");
2837 if (nbchars != 0) break;
2838
2839 xmlParsePEReference(ctxt);
2840
2841 /*
2842 * Pop-up of finished entities.
2843 */
2844 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2845 xmlPopInput(ctxt);
2846
2847 break;
2848 } else {
2849 COPY_BUF(l,buffer,nbchars,c);
2850 NEXTL(l);
2851 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2852 growBuffer(buffer);
2853 }
2854 }
2855 c = CUR_CHAR(l);
2856 }
2857 buffer[nbchars++] = 0;
2858 return(buffer);
2859#endif
2860 return(NULL);
2861}
2862
2863/**
2864 * xmlNamespaceParseNCName:
2865 * @ctxt: an XML parser context
2866 *
2867 * parse an XML namespace name.
2868 *
2869 * TODO: this seems not in use anymore, the namespace handling is done on
2870 * top of the SAX interfaces, i.e. not on raw input.
2871 *
2872 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2873 *
2874 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2875 * CombiningChar | Extender
2876 *
2877 * Returns the namespace name or NULL
2878 */
2879
2880xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002881xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002882#if 0
2883 xmlChar buf[XML_MAX_NAMELEN + 5];
2884 int len = 0, l;
2885 int cur = CUR_CHAR(l);
2886#endif
2887
2888 static int deprecated = 0;
2889 if (!deprecated) {
2890 xmlGenericError(xmlGenericErrorContext,
2891 "xmlNamespaceParseNCName() deprecated function reached\n");
2892 deprecated = 1;
2893 }
2894
2895#if 0
2896 /* load first the value of the char !!! */
2897 GROW;
2898 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2899
2900xmlGenericError(xmlGenericErrorContext,
2901 "xmlNamespaceParseNCName: reached loop 3\n");
2902 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2903 (cur == '.') || (cur == '-') ||
2904 (cur == '_') ||
2905 (IS_COMBINING(cur)) ||
2906 (IS_EXTENDER(cur))) {
2907 COPY_BUF(l,buf,len,cur);
2908 NEXTL(l);
2909 cur = CUR_CHAR(l);
2910 if (len >= XML_MAX_NAMELEN) {
2911 xmlGenericError(xmlGenericErrorContext,
2912 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2913 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2914 (cur == '.') || (cur == '-') ||
2915 (cur == '_') ||
2916 (IS_COMBINING(cur)) ||
2917 (IS_EXTENDER(cur))) {
2918 NEXTL(l);
2919 cur = CUR_CHAR(l);
2920 }
2921 break;
2922 }
2923 }
2924 return(xmlStrndup(buf, len));
2925#endif
2926 return(NULL);
2927}
2928
2929/**
2930 * xmlNamespaceParseQName:
2931 * @ctxt: an XML parser context
2932 * @prefix: a xmlChar **
2933 *
2934 * TODO: this seems not in use anymore, the namespace handling is done on
2935 * top of the SAX interfaces, i.e. not on raw input.
2936 *
2937 * parse an XML qualified name
2938 *
2939 * [NS 5] QName ::= (Prefix ':')? LocalPart
2940 *
2941 * [NS 6] Prefix ::= NCName
2942 *
2943 * [NS 7] LocalPart ::= NCName
2944 *
2945 * Returns the local part, and prefix is updated
2946 * to get the Prefix if any.
2947 */
2948
2949xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002950xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002951
2952 static int deprecated = 0;
2953 if (!deprecated) {
2954 xmlGenericError(xmlGenericErrorContext,
2955 "xmlNamespaceParseQName() deprecated function reached\n");
2956 deprecated = 1;
2957 }
2958
2959#if 0
2960 xmlChar *ret = NULL;
2961
2962 *prefix = NULL;
2963 ret = xmlNamespaceParseNCName(ctxt);
2964 if (RAW == ':') {
2965 *prefix = ret;
2966 NEXT;
2967 ret = xmlNamespaceParseNCName(ctxt);
2968 }
2969
2970 return(ret);
2971#endif
2972 return(NULL);
2973}
2974
2975/**
2976 * xmlNamespaceParseNSDef:
2977 * @ctxt: an XML parser context
2978 *
2979 * parse a namespace prefix declaration
2980 *
2981 * TODO: this seems not in use anymore, the namespace handling is done on
2982 * top of the SAX interfaces, i.e. not on raw input.
2983 *
2984 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2985 *
2986 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2987 *
2988 * Returns the namespace name
2989 */
2990
2991xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002992xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002993 static int deprecated = 0;
2994 if (!deprecated) {
2995 xmlGenericError(xmlGenericErrorContext,
2996 "xmlNamespaceParseNSDef() deprecated function reached\n");
2997 deprecated = 1;
2998 }
2999 return(NULL);
3000#if 0
3001 xmlChar *name = NULL;
3002
3003 if ((RAW == 'x') && (NXT(1) == 'm') &&
3004 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3005 (NXT(4) == 's')) {
3006 SKIP(5);
3007 if (RAW == ':') {
3008 NEXT;
3009 name = xmlNamespaceParseNCName(ctxt);
3010 }
3011 }
3012 return(name);
3013#endif
3014}
3015
3016/**
3017 * xmlParseQuotedString:
3018 * @ctxt: an XML parser context
3019 *
3020 * Parse and return a string between quotes or doublequotes
3021 *
3022 * TODO: Deprecated, to be removed at next drop of binary compatibility
3023 *
3024 * Returns the string parser or NULL.
3025 */
3026xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003027xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003028 static int deprecated = 0;
3029 if (!deprecated) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "xmlParseQuotedString() deprecated function reached\n");
3032 deprecated = 1;
3033 }
3034 return(NULL);
3035
3036#if 0
3037 xmlChar *buf = NULL;
3038 int len = 0,l;
3039 int size = XML_PARSER_BUFFER_SIZE;
3040 int c;
3041
3042 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3043 if (buf == NULL) {
3044 xmlGenericError(xmlGenericErrorContext,
3045 "malloc of %d byte failed\n", size);
3046 return(NULL);
3047 }
3048xmlGenericError(xmlGenericErrorContext,
3049 "xmlParseQuotedString: reached loop 4\n");
3050 if (RAW == '"') {
3051 NEXT;
3052 c = CUR_CHAR(l);
3053 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3054 if (len + 5 >= size) {
3055 size *= 2;
3056 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3057 if (buf == NULL) {
3058 xmlGenericError(xmlGenericErrorContext,
3059 "realloc of %d byte failed\n", size);
3060 return(NULL);
3061 }
3062 }
3063 COPY_BUF(l,buf,len,c);
3064 NEXTL(l);
3065 c = CUR_CHAR(l);
3066 }
3067 if (c != '"') {
3068 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3070 ctxt->sax->error(ctxt->userData,
3071 "String not closed \"%.50s\"\n", buf);
3072 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003073 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003074 } else {
3075 NEXT;
3076 }
3077 } else if (RAW == '\''){
3078 NEXT;
3079 c = CUR;
3080 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3081 if (len + 1 >= size) {
3082 size *= 2;
3083 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3084 if (buf == NULL) {
3085 xmlGenericError(xmlGenericErrorContext,
3086 "realloc of %d byte failed\n", size);
3087 return(NULL);
3088 }
3089 }
3090 buf[len++] = c;
3091 NEXT;
3092 c = CUR;
3093 }
3094 if (RAW != '\'') {
3095 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "String not closed \"%.50s\"\n", buf);
3099 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003100 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003101 } else {
3102 NEXT;
3103 }
3104 }
3105 return(buf);
3106#endif
3107}
3108
3109/**
3110 * xmlParseNamespace:
3111 * @ctxt: an XML parser context
3112 *
3113 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3114 *
3115 * This is what the older xml-name Working Draft specified, a bunch of
3116 * other stuff may still rely on it, so support is still here as
3117 * if it was declared on the root of the Tree:-(
3118 *
3119 * TODO: remove from library
3120 *
3121 * To be removed at next drop of binary compatibility
3122 */
3123
3124void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003125xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003126 static int deprecated = 0;
3127 if (!deprecated) {
3128 xmlGenericError(xmlGenericErrorContext,
3129 "xmlParseNamespace() deprecated function reached\n");
3130 deprecated = 1;
3131 }
3132
3133#if 0
3134 xmlChar *href = NULL;
3135 xmlChar *prefix = NULL;
3136 int garbage = 0;
3137
3138 /*
3139 * We just skipped "namespace" or "xml:namespace"
3140 */
3141 SKIP_BLANKS;
3142
3143xmlGenericError(xmlGenericErrorContext,
3144 "xmlParseNamespace: reached loop 5\n");
3145 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3146 /*
3147 * We can have "ns" or "prefix" attributes
3148 * Old encoding as 'href' or 'AS' attributes is still supported
3149 */
3150 if ((RAW == 'n') && (NXT(1) == 's')) {
3151 garbage = 0;
3152 SKIP(2);
3153 SKIP_BLANKS;
3154
3155 if (RAW != '=') continue;
3156 NEXT;
3157 SKIP_BLANKS;
3158
3159 href = xmlParseQuotedString(ctxt);
3160 SKIP_BLANKS;
3161 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3162 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3163 garbage = 0;
3164 SKIP(4);
3165 SKIP_BLANKS;
3166
3167 if (RAW != '=') continue;
3168 NEXT;
3169 SKIP_BLANKS;
3170
3171 href = xmlParseQuotedString(ctxt);
3172 SKIP_BLANKS;
3173 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3174 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3175 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3176 garbage = 0;
3177 SKIP(6);
3178 SKIP_BLANKS;
3179
3180 if (RAW != '=') continue;
3181 NEXT;
3182 SKIP_BLANKS;
3183
3184 prefix = xmlParseQuotedString(ctxt);
3185 SKIP_BLANKS;
3186 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3187 garbage = 0;
3188 SKIP(2);
3189 SKIP_BLANKS;
3190
3191 if (RAW != '=') continue;
3192 NEXT;
3193 SKIP_BLANKS;
3194
3195 prefix = xmlParseQuotedString(ctxt);
3196 SKIP_BLANKS;
3197 } else if ((RAW == '?') && (NXT(1) == '>')) {
3198 garbage = 0;
3199 NEXT;
3200 } else {
3201 /*
3202 * Found garbage when parsing the namespace
3203 */
3204 if (!garbage) {
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "xmlParseNamespace found garbage\n");
3208 }
3209 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3210 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003211 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003212 NEXT;
3213 }
3214 }
3215
3216 MOVETO_ENDTAG(CUR_PTR);
3217 NEXT;
3218
3219 /*
3220 * Register the DTD.
3221 if (href != NULL)
3222 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3223 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3224 */
3225
3226 if (prefix != NULL) xmlFree(prefix);
3227 if (href != NULL) xmlFree(href);
3228#endif
3229}
3230
3231/**
3232 * xmlScanName:
3233 * @ctxt: an XML parser context
3234 *
3235 * Trickery: parse an XML name but without consuming the input flow
3236 * Needed for rollback cases. Used only when parsing entities references.
3237 *
3238 * TODO: seems deprecated now, only used in the default part of
3239 * xmlParserHandleReference
3240 *
3241 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3242 * CombiningChar | Extender
3243 *
3244 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3245 *
3246 * [6] Names ::= Name (S Name)*
3247 *
3248 * Returns the Name parsed or NULL
3249 */
3250
3251xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003252xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003253 static int deprecated = 0;
3254 if (!deprecated) {
3255 xmlGenericError(xmlGenericErrorContext,
3256 "xmlScanName() deprecated function reached\n");
3257 deprecated = 1;
3258 }
3259 return(NULL);
3260
3261#if 0
3262 xmlChar buf[XML_MAX_NAMELEN];
3263 int len = 0;
3264
3265 GROW;
3266 if (!IS_LETTER(RAW) && (RAW != '_') &&
3267 (RAW != ':')) {
3268 return(NULL);
3269 }
3270
3271
3272 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3273 (NXT(len) == '.') || (NXT(len) == '-') ||
3274 (NXT(len) == '_') || (NXT(len) == ':') ||
3275 (IS_COMBINING(NXT(len))) ||
3276 (IS_EXTENDER(NXT(len)))) {
3277 GROW;
3278 buf[len] = NXT(len);
3279 len++;
3280 if (len >= XML_MAX_NAMELEN) {
3281 xmlGenericError(xmlGenericErrorContext,
3282 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3283 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3284 (IS_DIGIT(NXT(len))) ||
3285 (NXT(len) == '.') || (NXT(len) == '-') ||
3286 (NXT(len) == '_') || (NXT(len) == ':') ||
3287 (IS_COMBINING(NXT(len))) ||
3288 (IS_EXTENDER(NXT(len))))
3289 len++;
3290 break;
3291 }
3292 }
3293 return(xmlStrndup(buf, len));
3294#endif
3295}
3296
3297/**
3298 * xmlParserHandleReference:
3299 * @ctxt: the parser context
3300 *
3301 * TODO: Remove, now deprecated ... the test is done directly in the
3302 * content parsing
3303 * routines.
3304 *
3305 * [67] Reference ::= EntityRef | CharRef
3306 *
3307 * [68] EntityRef ::= '&' Name ';'
3308 *
3309 * [ WFC: Entity Declared ]
3310 * the Name given in the entity reference must match that in an entity
3311 * declaration, except that well-formed documents need not declare any
3312 * of the following entities: amp, lt, gt, apos, quot.
3313 *
3314 * [ WFC: Parsed Entity ]
3315 * An entity reference must not contain the name of an unparsed entity
3316 *
3317 * [66] CharRef ::= '&#' [0-9]+ ';' |
3318 * '&#x' [0-9a-fA-F]+ ';'
3319 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003320 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003321 * the handling is done accordingly to
3322 * http://www.w3.org/TR/REC-xml#entproc
3323 */
3324void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003325xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003326 static int deprecated = 0;
3327 if (!deprecated) {
3328 xmlGenericError(xmlGenericErrorContext,
3329 "xmlParserHandleReference() deprecated function reached\n");
3330 deprecated = 1;
3331 }
3332
Owen Taylor3473f882001-02-23 17:55:21 +00003333 return;
3334}
3335
3336/**
3337 * xmlHandleEntity:
3338 * @ctxt: an XML parser context
3339 * @entity: an XML entity pointer.
3340 *
3341 * Default handling of defined entities, when should we define a new input
3342 * stream ? When do we just handle that as a set of chars ?
3343 *
3344 * OBSOLETE: to be removed at some point.
3345 */
3346
3347void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003348xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003349 static int deprecated = 0;
3350 if (!deprecated) {
3351 xmlGenericError(xmlGenericErrorContext,
3352 "xmlHandleEntity() deprecated function reached\n");
3353 deprecated = 1;
3354 }
3355
3356#if 0
3357 int len;
3358 xmlParserInputPtr input;
3359
3360 if (entity->content == NULL) {
3361 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3363 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3364 entity->name);
3365 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003366 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003367 return;
3368 }
3369 len = xmlStrlen(entity->content);
3370 if (len <= 2) goto handle_as_char;
3371
3372 /*
3373 * Redefine its content as an input stream.
3374 */
3375 input = xmlNewEntityInputStream(ctxt, entity);
3376 xmlPushInput(ctxt, input);
3377 return;
3378
3379handle_as_char:
3380 /*
3381 * Just handle the content as a set of chars.
3382 */
3383 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3384 (ctxt->sax->characters != NULL))
3385 ctxt->sax->characters(ctxt->userData, entity->content, len);
3386#endif
3387}
3388
3389/**
3390 * xmlNewGlobalNs:
3391 * @doc: the document carrying the namespace
3392 * @href: the URI associated
3393 * @prefix: the prefix for the namespace
3394 *
3395 * Creation of a Namespace, the old way using PI and without scoping
3396 * DEPRECATED !!!
3397 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003398 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003399 */
3400xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003401xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3402 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003403 static int deprecated = 0;
3404 if (!deprecated) {
3405 xmlGenericError(xmlGenericErrorContext,
3406 "xmlNewGlobalNs() deprecated function reached\n");
3407 deprecated = 1;
3408 }
3409 return(NULL);
3410#if 0
3411 xmlNodePtr root;
3412
3413 xmlNsPtr cur;
3414
3415 root = xmlDocGetRootElement(doc);
3416 if (root != NULL)
3417 return(xmlNewNs(root, href, prefix));
3418
3419 /*
3420 * if there is no root element yet, create an old Namespace type
3421 * and it will be moved to the root at save time.
3422 */
3423 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3424 if (cur == NULL) {
3425 xmlGenericError(xmlGenericErrorContext,
3426 "xmlNewGlobalNs : malloc failed\n");
3427 return(NULL);
3428 }
3429 memset(cur, 0, sizeof(xmlNs));
3430 cur->type = XML_GLOBAL_NAMESPACE;
3431
3432 if (href != NULL)
3433 cur->href = xmlStrdup(href);
3434 if (prefix != NULL)
3435 cur->prefix = xmlStrdup(prefix);
3436
3437 /*
3438 * Add it at the end to preserve parsing order ...
3439 */
3440 if (doc != NULL) {
3441 if (doc->oldNs == NULL) {
3442 doc->oldNs = cur;
3443 } else {
3444 xmlNsPtr prev = doc->oldNs;
3445
3446 while (prev->next != NULL) prev = prev->next;
3447 prev->next = cur;
3448 }
3449 }
3450
3451 return(NULL);
3452#endif
3453}
3454
3455/**
3456 * xmlUpgradeOldNs:
3457 * @doc: a document pointer
3458 *
3459 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3460 * DEPRECATED
3461 */
3462void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003463xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003464 static int deprecated = 0;
3465 if (!deprecated) {
3466 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003467 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003468 deprecated = 1;
3469 }
3470#if 0
3471 xmlNsPtr cur;
3472
3473 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3474 if (doc->children == NULL) {
3475#ifdef DEBUG_TREE
3476 xmlGenericError(xmlGenericErrorContext,
3477 "xmlUpgradeOldNs: failed no root !\n");
3478#endif
3479 return;
3480 }
3481
3482 cur = doc->oldNs;
3483 while (cur->next != NULL) {
3484 cur->type = XML_LOCAL_NAMESPACE;
3485 cur = cur->next;
3486 }
3487 cur->type = XML_LOCAL_NAMESPACE;
3488 cur->next = doc->children->nsDef;
3489 doc->children->nsDef = doc->oldNs;
3490 doc->oldNs = NULL;
3491#endif
3492}
3493