blob: 5a9c8e058f29236a9b3e04763c303b58b3e6a816 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard2fdbd322003-08-18 12:15:38 +000050#include <libxml/dict.h>
Daniel Veillard16698282001-09-14 10:29:27 +000051#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000052#ifdef LIBXML_CATALOG_ENABLED
53#include <libxml/catalog.h>
54#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000055#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillard56a4cb82001-03-24 17:00:36 +000057void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000058
Daniel Veillarda53c6882001-07-25 17:18:57 +000059/*
60 * Various global defaults for parsing
61 */
Owen Taylor3473f882001-02-23 17:55:21 +000062
Daniel Veillard5e2dace2001-07-18 19:30:27 +000063/**
Owen Taylor3473f882001-02-23 17:55:21 +000064 * xmlCheckVersion:
65 * @version: the include version number
66 *
67 * check the compiled lib version against the include one.
68 * This can warn or immediately kill the application
69 */
70void
71xmlCheckVersion(int version) {
72 int myversion = (int) LIBXML_VERSION;
73
Daniel Veillard6f350292001-10-14 09:56:15 +000074 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000075
Owen Taylor3473f882001-02-23 17:55:21 +000076 if ((myversion / 10000) != (version / 10000)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Fatal: program compiled against libxml %d using libxml %d\n",
79 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000080 fprintf(stderr,
81 "Fatal: program compiled against libxml %d using libxml %d\n",
82 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000083 }
84 if ((myversion / 100) < (version / 100)) {
85 xmlGenericError(xmlGenericErrorContext,
86 "Warning: program compiled against libxml %d using older %d\n",
87 (version / 100), (myversion / 100));
88 }
89}
90
91
Daniel Veillard22090732001-07-16 00:06:07 +000092static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000093 "validate",
94 "load subset",
95 "keep blanks",
96 "disable SAX",
97 "fetch external entities",
98 "substitute entities",
99 "gather line info",
100 "user data",
101 "is html",
102 "is standalone",
103 "stop parser",
104 "document",
105 "is well formed",
106 "is valid",
107 "SAX block",
108 "SAX function internalSubset",
109 "SAX function isStandalone",
110 "SAX function hasInternalSubset",
111 "SAX function hasExternalSubset",
112 "SAX function resolveEntity",
113 "SAX function getEntity",
114 "SAX function entityDecl",
115 "SAX function notationDecl",
116 "SAX function attributeDecl",
117 "SAX function elementDecl",
118 "SAX function unparsedEntityDecl",
119 "SAX function setDocumentLocator",
120 "SAX function startDocument",
121 "SAX function endDocument",
122 "SAX function startElement",
123 "SAX function endElement",
124 "SAX function reference",
125 "SAX function characters",
126 "SAX function ignorableWhitespace",
127 "SAX function processingInstruction",
128 "SAX function comment",
129 "SAX function warning",
130 "SAX function error",
131 "SAX function fatalError",
132 "SAX function getParameterEntity",
133 "SAX function cdataBlock",
134 "SAX function externalSubset",
135};
136
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000137/**
Owen Taylor3473f882001-02-23 17:55:21 +0000138 * xmlGetFeaturesList:
139 * @len: the length of the features name array (input/output)
140 * @result: an array of string to be filled with the features name.
141 *
142 * Copy at most *@len feature names into the @result array
143 *
144 * Returns -1 in case or error, or the total number of features,
145 * len is updated with the number of strings copied,
146 * strings must not be deallocated
147 */
148int
149xmlGetFeaturesList(int *len, const char **result) {
150 int ret, i;
151
152 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
153 if ((len == NULL) || (result == NULL))
154 return(ret);
155 if ((*len < 0) || (*len >= 1000))
156 return(-1);
157 if (*len > ret)
158 *len = ret;
159 for (i = 0;i < *len;i++)
160 result[i] = xmlFeaturesList[i];
161 return(ret);
162}
163
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000164/**
Owen Taylor3473f882001-02-23 17:55:21 +0000165 * xmlGetFeature:
166 * @ctxt: an XML/HTML parser context
167 * @name: the feature name
168 * @result: location to store the result
169 *
170 * Read the current value of one feature of this parser instance
171 *
172 * Returns -1 in case or error, 0 otherwise
173 */
174int
175xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
176 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
177 return(-1);
178
179 if (!strcmp(name, "validate")) {
180 *((int *) result) = ctxt->validate;
181 } else if (!strcmp(name, "keep blanks")) {
182 *((int *) result) = ctxt->keepBlanks;
183 } else if (!strcmp(name, "disable SAX")) {
184 *((int *) result) = ctxt->disableSAX;
185 } else if (!strcmp(name, "fetch external entities")) {
186 *((int *) result) = ctxt->loadsubset;
187 } else if (!strcmp(name, "substitute entities")) {
188 *((int *) result) = ctxt->replaceEntities;
189 } else if (!strcmp(name, "gather line info")) {
190 *((int *) result) = ctxt->record_info;
191 } else if (!strcmp(name, "user data")) {
192 *((void **)result) = ctxt->userData;
193 } else if (!strcmp(name, "is html")) {
194 *((int *) result) = ctxt->html;
195 } else if (!strcmp(name, "is standalone")) {
196 *((int *) result) = ctxt->standalone;
197 } else if (!strcmp(name, "document")) {
198 *((xmlDocPtr *) result) = ctxt->myDoc;
199 } else if (!strcmp(name, "is well formed")) {
200 *((int *) result) = ctxt->wellFormed;
201 } else if (!strcmp(name, "is valid")) {
202 *((int *) result) = ctxt->valid;
203 } else if (!strcmp(name, "SAX block")) {
204 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
205 } else if (!strcmp(name, "SAX function internalSubset")) {
206 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
207 } else if (!strcmp(name, "SAX function isStandalone")) {
208 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
209 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
210 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
211 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
212 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
213 } else if (!strcmp(name, "SAX function resolveEntity")) {
214 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
215 } else if (!strcmp(name, "SAX function getEntity")) {
216 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
217 } else if (!strcmp(name, "SAX function entityDecl")) {
218 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
219 } else if (!strcmp(name, "SAX function notationDecl")) {
220 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
221 } else if (!strcmp(name, "SAX function attributeDecl")) {
222 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
223 } else if (!strcmp(name, "SAX function elementDecl")) {
224 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
225 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
226 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
227 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
228 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
229 } else if (!strcmp(name, "SAX function startDocument")) {
230 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
231 } else if (!strcmp(name, "SAX function endDocument")) {
232 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
233 } else if (!strcmp(name, "SAX function startElement")) {
234 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
235 } else if (!strcmp(name, "SAX function endElement")) {
236 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
237 } else if (!strcmp(name, "SAX function reference")) {
238 *((referenceSAXFunc *) result) = ctxt->sax->reference;
239 } else if (!strcmp(name, "SAX function characters")) {
240 *((charactersSAXFunc *) result) = ctxt->sax->characters;
241 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
242 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
243 } else if (!strcmp(name, "SAX function processingInstruction")) {
244 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
245 } else if (!strcmp(name, "SAX function comment")) {
246 *((commentSAXFunc *) result) = ctxt->sax->comment;
247 } else if (!strcmp(name, "SAX function warning")) {
248 *((warningSAXFunc *) result) = ctxt->sax->warning;
249 } else if (!strcmp(name, "SAX function error")) {
250 *((errorSAXFunc *) result) = ctxt->sax->error;
251 } else if (!strcmp(name, "SAX function fatalError")) {
252 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
253 } else if (!strcmp(name, "SAX function getParameterEntity")) {
254 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
255 } else if (!strcmp(name, "SAX function cdataBlock")) {
256 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
257 } else if (!strcmp(name, "SAX function externalSubset")) {
258 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
259 } else {
260 return(-1);
261 }
262 return(0);
263}
264
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000265/**
Owen Taylor3473f882001-02-23 17:55:21 +0000266 * xmlSetFeature:
267 * @ctxt: an XML/HTML parser context
268 * @name: the feature name
269 * @value: pointer to the location of the new value
270 *
271 * Change the current value of one feature of this parser instance
272 *
273 * Returns -1 in case or error, 0 otherwise
274 */
275int
276xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
277 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
278 return(-1);
279
280 if (!strcmp(name, "validate")) {
281 int newvalidate = *((int *) value);
282 if ((!ctxt->validate) && (newvalidate != 0)) {
283 if (ctxt->vctxt.warning == NULL)
284 ctxt->vctxt.warning = xmlParserValidityWarning;
285 if (ctxt->vctxt.error == NULL)
286 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000287 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000288 }
289 ctxt->validate = newvalidate;
290 } else if (!strcmp(name, "keep blanks")) {
291 ctxt->keepBlanks = *((int *) value);
292 } else if (!strcmp(name, "disable SAX")) {
293 ctxt->disableSAX = *((int *) value);
294 } else if (!strcmp(name, "fetch external entities")) {
295 ctxt->loadsubset = *((int *) value);
296 } else if (!strcmp(name, "substitute entities")) {
297 ctxt->replaceEntities = *((int *) value);
298 } else if (!strcmp(name, "gather line info")) {
299 ctxt->record_info = *((int *) value);
300 } else if (!strcmp(name, "user data")) {
301 ctxt->userData = *((void **)value);
302 } else if (!strcmp(name, "is html")) {
303 ctxt->html = *((int *) value);
304 } else if (!strcmp(name, "is standalone")) {
305 ctxt->standalone = *((int *) value);
306 } else if (!strcmp(name, "document")) {
307 ctxt->myDoc = *((xmlDocPtr *) value);
308 } else if (!strcmp(name, "is well formed")) {
309 ctxt->wellFormed = *((int *) value);
310 } else if (!strcmp(name, "is valid")) {
311 ctxt->valid = *((int *) value);
312 } else if (!strcmp(name, "SAX block")) {
313 ctxt->sax = *((xmlSAXHandlerPtr *) value);
314 } else if (!strcmp(name, "SAX function internalSubset")) {
315 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function isStandalone")) {
317 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
319 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
321 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function resolveEntity")) {
323 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
324 } else if (!strcmp(name, "SAX function getEntity")) {
325 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function entityDecl")) {
327 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function notationDecl")) {
329 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function attributeDecl")) {
331 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function elementDecl")) {
333 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
335 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
337 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function startDocument")) {
339 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function endDocument")) {
341 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startElement")) {
343 ctxt->sax->startElement = *((startElementSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endElement")) {
345 ctxt->sax->endElement = *((endElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function reference")) {
347 ctxt->sax->reference = *((referenceSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function characters")) {
349 ctxt->sax->characters = *((charactersSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
351 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function processingInstruction")) {
353 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function comment")) {
355 ctxt->sax->comment = *((commentSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function warning")) {
357 ctxt->sax->warning = *((warningSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function error")) {
359 ctxt->sax->error = *((errorSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function fatalError")) {
361 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function getParameterEntity")) {
363 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
364 } else if (!strcmp(name, "SAX function cdataBlock")) {
365 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function externalSubset")) {
367 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
368 } else {
369 return(-1);
370 }
371 return(0);
372}
373
374/************************************************************************
375 * *
376 * Some functions to avoid too large macros *
377 * *
378 ************************************************************************/
379
380/**
381 * xmlIsChar:
382 * @c: an unicode character (int)
383 *
384 * Check whether the character is allowed by the production
385 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
386 * | [#x10000-#x10FFFF]
387 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
388 * Also available as a macro IS_CHAR()
389 *
390 * Returns 0 if not, non-zero otherwise
391 */
392int
393xmlIsChar(int c) {
394 return(
395 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
396 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
397 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
398 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
399}
400
401/**
402 * xmlIsBlank:
403 * @c: an unicode character (int)
404 *
405 * Check whether the character is allowed by the production
406 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
407 * Also available as a macro IS_BLANK()
408 *
409 * Returns 0 if not, non-zero otherwise
410 */
411int
412xmlIsBlank(int c) {
413 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
414}
415
Owen Taylor3473f882001-02-23 17:55:21 +0000416static int xmlBaseArray[] = {
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
420 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
421 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
423 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
430 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
432 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
433};
434
Daniel Veillard01c13b52002-12-10 15:19:08 +0000435/**
436 * xmlIsBaseChar:
437 * @c: an unicode character (int)
438 *
439 * Check whether the character is allowed by the production
440 * [85] BaseChar ::= ... long list see REC ...
441 *
442 * VI is your friend !
443 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
444 * and
445 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
446 *
447 * Returns 0 if not, non-zero otherwise
448 */
Owen Taylor3473f882001-02-23 17:55:21 +0000449int
450xmlIsBaseChar(int c) {
451 return(
452 (((c) < 0x0100) ? xmlBaseArray[c] :
453 ( /* accelerator */
454 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
455 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
456 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
457 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
458 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
459 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
460 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
461 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
462 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
463 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
464 ((c) == 0x0386) ||
465 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
466 ((c) == 0x038C) ||
467 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
468 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
469 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
470 ((c) == 0x03DA) ||
471 ((c) == 0x03DC) ||
472 ((c) == 0x03DE) ||
473 ((c) == 0x03E0) ||
474 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
475 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
476 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
477 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
478 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
479 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
480 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
481 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
482 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
483 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
484 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
485 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
486 ((c) == 0x0559) ||
487 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
488 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
489 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
490 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
491 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
492 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
493 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
494 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
495 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
496 ((c) == 0x06D5) ||
497 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
498 (((c) >= 0x905) && ( /* accelerator */
499 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
500 ((c) == 0x093D) ||
501 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
502 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
503 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
504 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
505 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
506 ((c) == 0x09B2) ||
507 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
508 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
509 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
510 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
511 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
512 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
513 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
514 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
515 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
516 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
517 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
518 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
519 ((c) == 0x0A5E) ||
520 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
521 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
522 ((c) == 0x0A8D) ||
523 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
524 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
525 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
526 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
527 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
528 ((c) == 0x0ABD) ||
529 ((c) == 0x0AE0) ||
530 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
531 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
532 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
533 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
534 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
535 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
536 ((c) == 0x0B3D) ||
537 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
538 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
539 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
540 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
541 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
542 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
543 ((c) == 0x0B9C) ||
544 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
545 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
546 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
547 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
548 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
549 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
550 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
551 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
552 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
553 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
554 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
555 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
556 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
557 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
558 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
559 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
560 ((c) == 0x0CDE) ||
561 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
562 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
563 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
564 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
565 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
566 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
567 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
568 ((c) == 0x0E30) ||
569 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
570 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
571 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
572 ((c) == 0x0E84) ||
573 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
574 ((c) == 0x0E8A) ||
575 ((c) == 0x0E8D) ||
576 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
577 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
578 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
579 ((c) == 0x0EA5) ||
580 ((c) == 0x0EA7) ||
581 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
582 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
583 ((c) == 0x0EB0) ||
584 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
585 ((c) == 0x0EBD) ||
586 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
587 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
588 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
589 (((c) >= 0x10A0) && ( /* accelerator */
590 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
591 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
592 ((c) == 0x1100) ||
593 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
594 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
595 ((c) == 0x1109) ||
596 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
597 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
598 ((c) == 0x113C) ||
599 ((c) == 0x113E) ||
600 ((c) == 0x1140) ||
601 ((c) == 0x114C) ||
602 ((c) == 0x114E) ||
603 ((c) == 0x1150) ||
604 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
605 ((c) == 0x1159) ||
606 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
607 ((c) == 0x1163) ||
608 ((c) == 0x1165) ||
609 ((c) == 0x1167) ||
610 ((c) == 0x1169) ||
611 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
612 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
613 ((c) == 0x1175) ||
614 ((c) == 0x119E) ||
615 ((c) == 0x11A8) ||
616 ((c) == 0x11AB) ||
617 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
618 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
619 ((c) == 0x11BA) ||
620 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
621 ((c) == 0x11EB) ||
622 ((c) == 0x11F0) ||
623 ((c) == 0x11F9) ||
624 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
625 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
626 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
627 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
628 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
629 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
630 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
631 ((c) == 0x1F59) ||
632 ((c) == 0x1F5B) ||
633 ((c) == 0x1F5D) ||
634 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
635 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
636 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
637 ((c) == 0x1FBE) ||
638 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
639 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
640 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
641 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
642 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
643 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
644 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
645 ((c) == 0x2126) ||
646 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
647 ((c) == 0x212E) ||
648 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
649 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
650 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
651 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
652 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
653}
654
655/**
656 * xmlIsDigit:
657 * @c: an unicode character (int)
658 *
659 * Check whether the character is allowed by the production
660 * [88] Digit ::= ... long list see REC ...
661 *
662 * Returns 0 if not, non-zero otherwise
663 */
664int
665xmlIsDigit(int c) {
666 return(
667 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
668 (((c) >= 0x660) && ( /* accelerator */
669 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
670 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
671 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
672 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
673 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
674 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
675 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
676 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
677 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
678 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
679 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
680 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
681 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
682 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
683}
684
685/**
686 * xmlIsCombining:
687 * @c: an unicode character (int)
688 *
689 * Check whether the character is allowed by the production
690 * [87] CombiningChar ::= ... long list see REC ...
691 *
692 * Returns 0 if not, non-zero otherwise
693 */
694int
695xmlIsCombining(int c) {
696 return(
697 (((c) >= 0x300) && ( /* accelerator */
698 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
699 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
700 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
701 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
702 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
703 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
704 ((c) == 0x05BF) ||
705 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
706 ((c) == 0x05C4) ||
707 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
708 ((c) == 0x0670) ||
709 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
710 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
711 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
712 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
713 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
714 (((c) >= 0x0901) && ( /* accelerator */
715 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
716 ((c) == 0x093C) ||
717 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
718 ((c) == 0x094D) ||
719 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
720 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
721 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
722 ((c) == 0x09BC) ||
723 ((c) == 0x09BE) ||
724 ((c) == 0x09BF) ||
725 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
726 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
727 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
728 ((c) == 0x09D7) ||
729 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
730 (((c) >= 0x0A02) && ( /* accelerator */
731 ((c) == 0x0A02) ||
732 ((c) == 0x0A3C) ||
733 ((c) == 0x0A3E) ||
734 ((c) == 0x0A3F) ||
735 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
736 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
737 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
738 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
739 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
740 ((c) == 0x0ABC) ||
741 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
742 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
743 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
744 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
745 ((c) == 0x0B3C) ||
746 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
747 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
748 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
749 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
750 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
751 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
752 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
753 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
754 ((c) == 0x0BD7) ||
755 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
756 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
757 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
758 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
759 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
760 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
761 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
762 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
763 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
764 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
765 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
766 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
767 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
768 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
769 ((c) == 0x0D57) ||
770 (((c) >= 0x0E31) && ( /* accelerator */
771 ((c) == 0x0E31) ||
772 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
773 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
774 ((c) == 0x0EB1) ||
775 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
776 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
777 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
778 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
779 ((c) == 0x0F35) ||
780 ((c) == 0x0F37) ||
781 ((c) == 0x0F39) ||
782 ((c) == 0x0F3E) ||
783 ((c) == 0x0F3F) ||
784 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
785 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
786 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
787 ((c) == 0x0F97) ||
788 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
789 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
790 ((c) == 0x0FB9) ||
791 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
792 ((c) == 0x20E1) ||
793 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
794 ((c) == 0x3099) ||
795 ((c) == 0x309A))))))))));
796}
797
798/**
799 * xmlIsExtender:
800 * @c: an unicode character (int)
801 *
802 * Check whether the character is allowed by the production
803 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
804 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
805 * [#x309D-#x309E] | [#x30FC-#x30FE]
806 *
807 * Returns 0 if not, non-zero otherwise
808 */
809int
810xmlIsExtender(int c) {
811 switch (c) {
812 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
813 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
814 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
815 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000816 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000817 return 1;
818 default:
819 return 0;
820 }
821}
822
823/**
824 * xmlIsIdeographic:
825 * @c: an unicode character (int)
826 *
827 * Check whether the character is allowed by the production
828 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
829 *
830 * Returns 0 if not, non-zero otherwise
831 */
832int
833xmlIsIdeographic(int c) {
834 return(((c) < 0x0100) ? 0 :
835 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
836 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
837 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
838 ((c) == 0x3007));
839}
840
841/**
842 * xmlIsLetter:
843 * @c: an unicode character (int)
844 *
845 * Check whether the character is allowed by the production
846 * [84] Letter ::= BaseChar | Ideographic
847 *
848 * Returns 0 if not, non-zero otherwise
849 */
850int
851xmlIsLetter(int c) {
852 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
853}
854
855/**
856 * xmlIsPubidChar:
857 * @c: an unicode character (int)
858 *
859 * Check whether the character is allowed by the production
860 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
861 *
862 * Returns 0 if not, non-zero otherwise
863 */
864int
865xmlIsPubidChar(int c) {
866 return(
867 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
868 (((c) >= 'a') && ((c) <= 'z')) ||
869 (((c) >= 'A') && ((c) <= 'Z')) ||
870 (((c) >= '0') && ((c) <= '9')) ||
871 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
872 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
873 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
874 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
875 ((c) == '$') || ((c) == '_') || ((c) == '%'));
876}
877
878/************************************************************************
879 * *
880 * Input handling functions for progressive parsing *
881 * *
882 ************************************************************************/
883
884/* #define DEBUG_INPUT */
885/* #define DEBUG_STACK */
886/* #define DEBUG_PUSH */
887
888
889/* we need to keep enough input to show errors in context */
890#define LINE_LEN 80
891
892#ifdef DEBUG_INPUT
893#define CHECK_BUFFER(in) check_buffer(in)
894
Daniel Veillard01c13b52002-12-10 15:19:08 +0000895static
Owen Taylor3473f882001-02-23 17:55:21 +0000896void check_buffer(xmlParserInputPtr in) {
897 if (in->base != in->buf->buffer->content) {
898 xmlGenericError(xmlGenericErrorContext,
899 "xmlParserInput: base mismatch problem\n");
900 }
901 if (in->cur < in->base) {
902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: cur < base problem\n");
904 }
905 if (in->cur > in->base + in->buf->buffer->use) {
906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur > base + use problem\n");
908 }
909 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
910 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
911 in->buf->buffer->use, in->buf->buffer->size);
912}
913
914#else
915#define CHECK_BUFFER(in)
916#endif
917
918
919/**
920 * xmlParserInputRead:
921 * @in: an XML parser input
922 * @len: an indicative size for the lookahead
923 *
924 * This function refresh the input for the parser. It doesn't try to
925 * preserve pointers to the input buffer, and discard already read data
926 *
927 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
928 * end of this entity
929 */
930int
931xmlParserInputRead(xmlParserInputPtr in, int len) {
932 int ret;
933 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000934 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000935
936#ifdef DEBUG_INPUT
937 xmlGenericError(xmlGenericErrorContext, "Read\n");
938#endif
939 if (in->buf == NULL) return(-1);
940 if (in->base == NULL) return(-1);
941 if (in->cur == NULL) return(-1);
942 if (in->buf->buffer == NULL) return(-1);
943 if (in->buf->readcallback == NULL) return(-1);
944
945 CHECK_BUFFER(in);
946
947 used = in->cur - in->buf->buffer->content;
948 ret = xmlBufferShrink(in->buf->buffer, used);
949 if (ret > 0) {
950 in->cur -= ret;
951 in->consumed += ret;
952 }
953 ret = xmlParserInputBufferRead(in->buf, len);
954 if (in->base != in->buf->buffer->content) {
955 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000956 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000957 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000959 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000960 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000961 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000962 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000963
964 CHECK_BUFFER(in);
965
966 return(ret);
967}
968
969/**
970 * xmlParserInputGrow:
971 * @in: an XML parser input
972 * @len: an indicative size for the lookahead
973 *
974 * This function increase the input for the parser. It tries to
975 * preserve pointers to the input buffer, and keep already read data
976 *
977 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
978 * end of this entity
979 */
980int
981xmlParserInputGrow(xmlParserInputPtr in, int len) {
982 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000983 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000984
985#ifdef DEBUG_INPUT
986 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987#endif
988 if (in->buf == NULL) return(-1);
989 if (in->base == NULL) return(-1);
990 if (in->cur == NULL) return(-1);
991 if (in->buf->buffer == NULL) return(-1);
992
993 CHECK_BUFFER(in);
994
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000995 indx = in->cur - in->base;
996 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000997
998 CHECK_BUFFER(in);
999
1000 return(0);
1001 }
1002 if (in->buf->readcallback != NULL)
1003 ret = xmlParserInputBufferGrow(in->buf, len);
1004 else
1005 return(0);
1006
1007 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001008 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001009 * block, but we use it really as an integer to do some
1010 * pointer arithmetic. Insure will raise it as a bug but in
1011 * that specific case, that's not !
1012 */
1013 if (in->base != in->buf->buffer->content) {
1014 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001015 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001018 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001019 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001020 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001021 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001022
1023 CHECK_BUFFER(in);
1024
1025 return(ret);
1026}
1027
1028/**
1029 * xmlParserInputShrink:
1030 * @in: an XML parser input
1031 *
1032 * This function removes used input for the parser.
1033 */
1034void
1035xmlParserInputShrink(xmlParserInputPtr in) {
1036 int used;
1037 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001038 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001039
1040#ifdef DEBUG_INPUT
1041 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1042#endif
1043 if (in->buf == NULL) return;
1044 if (in->base == NULL) return;
1045 if (in->cur == NULL) return;
1046 if (in->buf->buffer == NULL) return;
1047
1048 CHECK_BUFFER(in);
1049
1050 used = in->cur - in->buf->buffer->content;
1051 /*
1052 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001053 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001054 */
Owen Taylor3473f882001-02-23 17:55:21 +00001055 if (used > INPUT_CHUNK) {
1056 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057 if (ret > 0) {
1058 in->cur -= ret;
1059 in->consumed += ret;
1060 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001061 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001072 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001073 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001075 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001077 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001078 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001079
1080 CHECK_BUFFER(in);
1081}
1082
1083/************************************************************************
1084 * *
1085 * UTF8 character input and related functions *
1086 * *
1087 ************************************************************************/
1088
1089/**
1090 * xmlNextChar:
1091 * @ctxt: the XML parser context
1092 *
1093 * Skip to the next char input char.
1094 */
1095
1096void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001097xmlNextChar(xmlParserCtxtPtr ctxt)
1098{
Owen Taylor3473f882001-02-23 17:55:21 +00001099 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001100 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001101
Daniel Veillardfdc91562002-07-01 21:52:03 +00001102 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001103 if ((*ctxt->input->cur == 0) &&
1104 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1105 (ctxt->instate != XML_PARSER_COMMENT)) {
1106 /*
1107 * If we are at the end of the current entity and
1108 * the context allows it, we pop consumed entities
1109 * automatically.
1110 * the auto closing should be blocked in other cases
1111 */
1112 xmlPopInput(ctxt);
1113 } else {
1114 const unsigned char *cur;
1115 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001116
Daniel Veillard77a90a72003-03-22 00:04:05 +00001117 /*
1118 * 2.11 End-of-Line Handling
1119 * the literal two-character sequence "#xD#xA" or a standalone
1120 * literal #xD, an XML processor must pass to the application
1121 * the single character #xA.
1122 */
1123 if (*(ctxt->input->cur) == '\n') {
1124 ctxt->input->line++;
1125 ctxt->input->col = 1;
1126 } else
1127 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001128
Daniel Veillard77a90a72003-03-22 00:04:05 +00001129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 cur = ctxt->input->cur;
1141
1142 c = *cur;
1143 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001144 if (c == 0xC0)
1145 goto encoding_error;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001146 if (cur[1] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[1] & 0xc0) != 0x80)
1149 goto encoding_error;
1150 if ((c & 0xe0) == 0xe0) {
1151 unsigned int val;
1152
1153 if (cur[2] == 0)
1154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1155 if ((cur[2] & 0xc0) != 0x80)
1156 goto encoding_error;
1157 if ((c & 0xf0) == 0xf0) {
1158 if (cur[3] == 0)
1159 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1160 if (((c & 0xf8) != 0xf0) ||
1161 ((cur[3] & 0xc0) != 0x80))
1162 goto encoding_error;
1163 /* 4-byte code */
1164 ctxt->input->cur += 4;
1165 val = (cur[0] & 0x7) << 18;
1166 val |= (cur[1] & 0x3f) << 12;
1167 val |= (cur[2] & 0x3f) << 6;
1168 val |= cur[3] & 0x3f;
1169 } else {
1170 /* 3-byte code */
1171 ctxt->input->cur += 3;
1172 val = (cur[0] & 0xf) << 12;
1173 val |= (cur[1] & 0x3f) << 6;
1174 val |= cur[2] & 0x3f;
1175 }
1176 if (((val > 0xd7ff) && (val < 0xe000)) ||
1177 ((val > 0xfffd) && (val < 0x10000)) ||
1178 (val >= 0x110000)) {
1179 if ((ctxt->sax != NULL) &&
1180 (ctxt->sax->error != NULL))
1181 ctxt->sax->error(ctxt->userData,
1182 "Char 0x%X out of allowed range\n",
1183 val);
1184 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1185 ctxt->wellFormed = 0;
1186 if (ctxt->recovery == 0)
1187 ctxt->disableSAX = 1;
1188 }
1189 } else
1190 /* 2-byte code */
1191 ctxt->input->cur += 2;
1192 } else
1193 /* 1-byte code */
1194 ctxt->input->cur++;
1195
1196 ctxt->nbChars++;
1197 if (*ctxt->input->cur == 0)
1198 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1199 }
Owen Taylor3473f882001-02-23 17:55:21 +00001200 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001201 /*
1202 * Assume it's a fixed length encoding (1) with
1203 * a compatible encoding for the ASCII set, since
1204 * XML constructs only use < 128 chars
1205 */
1206
1207 if (*(ctxt->input->cur) == '\n') {
1208 ctxt->input->line++;
1209 ctxt->input->col = 1;
1210 } else
1211 ctxt->input->col++;
1212 ctxt->input->cur++;
1213 ctxt->nbChars++;
1214 if (*ctxt->input->cur == 0)
1215 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001216 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001217 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001218 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001219 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001220 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001221 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001222 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001224 /*
1225 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001226 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001227 * declaration header. Report the error and switch the encoding
1228 * to ISO-Latin-1 (if you don't like this policy, just declare the
1229 * encoding !)
1230 */
1231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001232 ctxt->sax->error(ctxt->userData,
1233 "Input is not proper UTF-8, indicate encoding !\n");
1234 ctxt->sax->error(ctxt->userData,
1235 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1236 ctxt->input->cur[0], ctxt->input->cur[1],
1237 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001238 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001239 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001240 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1241
Daniel Veillard77a90a72003-03-22 00:04:05 +00001242 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001243 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001244 return;
1245}
1246
1247/**
1248 * xmlCurrentChar:
1249 * @ctxt: the XML parser context
1250 * @len: pointer to the length of the char read
1251 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001252 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001253 * bytes in the input buffer. Implement the end of line normalization:
1254 * 2.11 End-of-Line Handling
1255 * Wherever an external parsed entity or the literal entity value
1256 * of an internal parsed entity contains either the literal two-character
1257 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1258 * must pass to the application the single character #xA.
1259 * This behavior can conveniently be produced by normalizing all
1260 * line breaks to #xA on input, before parsing.)
1261 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001262 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001263 */
1264
1265int
1266xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1267 if (ctxt->instate == XML_PARSER_EOF)
1268 return(0);
1269
Daniel Veillard561b7f82002-03-20 21:55:57 +00001270 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1271 *len = 1;
1272 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001273 }
1274 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1275 /*
1276 * We are supposed to handle UTF8, check it's valid
1277 * From rfc2044: encoding of the Unicode values on UTF-8:
1278 *
1279 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1280 * 0000 0000-0000 007F 0xxxxxxx
1281 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1282 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1283 *
1284 * Check for the 0x110000 limit too
1285 */
1286 const unsigned char *cur = ctxt->input->cur;
1287 unsigned char c;
1288 unsigned int val;
1289
1290 c = *cur;
1291 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001292 if (c == 0xC0)
1293 goto encoding_error;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (cur[1] == 0)
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001297 goto encoding_error;
1298 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001299
1300 if (cur[2] == 0)
1301 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1302 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001303 goto encoding_error;
1304 if ((c & 0xf0) == 0xf0) {
1305 if (cur[3] == 0)
1306 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001307 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001308 ((cur[3] & 0xc0) != 0x80))
1309 goto encoding_error;
1310 /* 4-byte code */
1311 *len = 4;
1312 val = (cur[0] & 0x7) << 18;
1313 val |= (cur[1] & 0x3f) << 12;
1314 val |= (cur[2] & 0x3f) << 6;
1315 val |= cur[3] & 0x3f;
1316 } else {
1317 /* 3-byte code */
1318 *len = 3;
1319 val = (cur[0] & 0xf) << 12;
1320 val |= (cur[1] & 0x3f) << 6;
1321 val |= cur[2] & 0x3f;
1322 }
1323 } else {
1324 /* 2-byte code */
1325 *len = 2;
1326 val = (cur[0] & 0x1f) << 6;
1327 val |= cur[1] & 0x3f;
1328 }
1329 if (!IS_CHAR(val)) {
1330 if ((ctxt->sax != NULL) &&
1331 (ctxt->sax->error != NULL))
1332 ctxt->sax->error(ctxt->userData,
1333 "Char 0x%X out of allowed range\n", val);
1334 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1335 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001336 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001337 }
1338 return(val);
1339 } else {
1340 /* 1-byte code */
1341 *len = 1;
1342 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001343 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001344 ctxt->nbChars++;
1345 ctxt->input->cur++;
1346 }
1347 return(0xA);
1348 }
1349 return((int) *ctxt->input->cur);
1350 }
1351 }
1352 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001353 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001354 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001355 * XML constructs only use < 128 chars
1356 */
1357 *len = 1;
1358 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001359 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001360 ctxt->nbChars++;
1361 ctxt->input->cur++;
1362 }
1363 return(0xA);
1364 }
1365 return((int) *ctxt->input->cur);
1366encoding_error:
1367 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001368 * An encoding problem may arise from a truncated input buffer
1369 * splitting a character in the middle. In that case do not raise
1370 * an error but return 0 to endicate an end of stream problem
1371 */
1372 if (ctxt->input->end - ctxt->input->cur < 4) {
1373 *len = 0;
1374 return(0);
1375 }
1376
1377 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001378 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001379 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001380 * declaration header. Report the error and switch the encoding
1381 * to ISO-Latin-1 (if you don't like this policy, just declare the
1382 * encoding !)
1383 */
1384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1385 ctxt->sax->error(ctxt->userData,
1386 "Input is not proper UTF-8, indicate encoding !\n");
1387 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001388 ctxt->input->cur[0], ctxt->input->cur[1],
1389 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001390 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001391 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001392 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1393
1394 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1395 *len = 1;
1396 return((int) *ctxt->input->cur);
1397}
1398
1399/**
1400 * xmlStringCurrentChar:
1401 * @ctxt: the XML parser context
1402 * @cur: pointer to the beginning of the char
1403 * @len: pointer to the length of the char read
1404 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001405 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001406 * bytes in the input buffer.
1407 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001408 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001409 */
1410
1411int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001412xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1413{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001414 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001415 /*
1416 * We are supposed to handle UTF8, check it's valid
1417 * From rfc2044: encoding of the Unicode values on UTF-8:
1418 *
1419 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1420 * 0000 0000-0000 007F 0xxxxxxx
1421 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1422 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1423 *
1424 * Check for the 0x110000 limit too
1425 */
1426 unsigned char c;
1427 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillardd8224e02002-01-13 15:43:22 +00001429 c = *cur;
1430 if (c & 0x80) {
1431 if ((cur[1] & 0xc0) != 0x80)
1432 goto encoding_error;
1433 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001434
Daniel Veillardd8224e02002-01-13 15:43:22 +00001435 if ((cur[2] & 0xc0) != 0x80)
1436 goto encoding_error;
1437 if ((c & 0xf0) == 0xf0) {
1438 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1439 goto encoding_error;
1440 /* 4-byte code */
1441 *len = 4;
1442 val = (cur[0] & 0x7) << 18;
1443 val |= (cur[1] & 0x3f) << 12;
1444 val |= (cur[2] & 0x3f) << 6;
1445 val |= cur[3] & 0x3f;
1446 } else {
1447 /* 3-byte code */
1448 *len = 3;
1449 val = (cur[0] & 0xf) << 12;
1450 val |= (cur[1] & 0x3f) << 6;
1451 val |= cur[2] & 0x3f;
1452 }
1453 } else {
1454 /* 2-byte code */
1455 *len = 2;
1456 val = (cur[0] & 0x1f) << 6;
1457 val |= cur[1] & 0x3f;
1458 }
1459 if (!IS_CHAR(val)) {
1460 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1461 (ctxt->sax->error != NULL))
1462 ctxt->sax->error(ctxt->userData,
1463 "Char 0x%X out of allowed range\n",
1464 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001465 if (ctxt != NULL) {
1466 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1467 ctxt->wellFormed = 0;
1468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1469 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001470 }
1471 return (val);
1472 } else {
1473 /* 1-byte code */
1474 *len = 1;
1475 return ((int) *cur);
1476 }
Owen Taylor3473f882001-02-23 17:55:21 +00001477 }
1478 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001479 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001480 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001481 * XML constructs only use < 128 chars
1482 */
1483 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001484 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001485encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001486
Owen Taylor3473f882001-02-23 17:55:21 +00001487 /*
1488 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001489 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001490 * declaration header. Report the error and switch the encoding
1491 * to ISO-Latin-1 (if you don't like this policy, just declare the
1492 * encoding !)
1493 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001494 if (ctxt != NULL) {
1495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1496 ctxt->sax->error(ctxt->userData,
1497 "Input is not proper UTF-8, indicate encoding !\n");
1498 ctxt->sax->error(ctxt->userData,
1499 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1500 ctxt->input->cur[0], ctxt->input->cur[1],
1501 ctxt->input->cur[2], ctxt->input->cur[3]);
1502 }
1503 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001504 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001505 }
Owen Taylor3473f882001-02-23 17:55:21 +00001506
1507 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001508 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001509}
1510
1511/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001512 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001513 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001514 * @val: the char value
1515 *
1516 * append the char value in the array
1517 *
1518 * Returns the number of xmlChar written
1519 */
Owen Taylor3473f882001-02-23 17:55:21 +00001520int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001521xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001522 /*
1523 * We are supposed to handle UTF8, check it's valid
1524 * From rfc2044: encoding of the Unicode values on UTF-8:
1525 *
1526 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1527 * 0000 0000-0000 007F 0xxxxxxx
1528 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1529 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1530 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001531 if (val >= 0x80) {
1532 xmlChar *savedout = out;
1533 int bits;
1534 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1535 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1536 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1537 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001538 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001539 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001540 val);
1541 return(0);
1542 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001543 for ( ; bits >= 0; bits-= 6)
1544 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1545 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001546 }
1547 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001548 return 1;
1549}
1550
1551/**
1552 * xmlCopyChar:
1553 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001554 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001555 * @val: the char value
1556 *
1557 * append the char value in the array
1558 *
1559 * Returns the number of xmlChar written
1560 */
1561
1562int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001563xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001564 /* the len parameter is ignored */
1565 if (val >= 0x80) {
1566 return(xmlCopyCharMultiByte (out, val));
1567 }
1568 *out = (xmlChar) val;
1569 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001570}
1571
1572/************************************************************************
1573 * *
1574 * Commodity functions to switch encodings *
1575 * *
1576 ************************************************************************/
1577
1578/**
1579 * xmlSwitchEncoding:
1580 * @ctxt: the parser context
1581 * @enc: the encoding value (number)
1582 *
1583 * change the input functions when discovering the character encoding
1584 * of a given entity.
1585 *
1586 * Returns 0 in case of success, -1 otherwise
1587 */
1588int
1589xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1590{
1591 xmlCharEncodingHandlerPtr handler;
1592
1593 switch (enc) {
1594 case XML_CHAR_ENCODING_ERROR:
1595 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1597 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1598 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001599 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001600 break;
1601 case XML_CHAR_ENCODING_NONE:
1602 /* let's assume it's UTF-8 without the XML decl */
1603 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1604 return(0);
1605 case XML_CHAR_ENCODING_UTF8:
1606 /* default encoding, no conversion should be needed */
1607 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001608
1609 /*
1610 * Errata on XML-1.0 June 20 2001
1611 * Specific handling of the Byte Order Mark for
1612 * UTF-8
1613 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001614 if ((ctxt->input != NULL) &&
1615 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001616 (ctxt->input->cur[1] == 0xBB) &&
1617 (ctxt->input->cur[2] == 0xBF)) {
1618 ctxt->input->cur += 3;
1619 }
Owen Taylor3473f882001-02-23 17:55:21 +00001620 return(0);
Daniel Veillard2dcb9372003-07-16 21:18:19 +00001621 case XML_CHAR_ENCODING_UTF16LE:
1622 case XML_CHAR_ENCODING_UTF16BE:
1623 /*The raw input characters are encoded
1624 *in UTF-16. As we expect this function
1625 *to be called after xmlCharEncInFunc, we expect
1626 *ctxt->input->cur to contain UTF-8 encoded characters.
1627 *So the raw UTF16 Byte Order Mark
1628 *has also been converted into
1629 *an UTF-8 BOM. Let's skip that BOM.
1630 */
1631 if ((ctxt->input != NULL) &&
1632 (ctxt->input->cur[0] == 0xEF) &&
1633 (ctxt->input->cur[1] == 0xBB) &&
1634 (ctxt->input->cur[2] == 0xBF)) {
1635 ctxt->input->cur += 3;
1636 }
1637 break ;
Owen Taylor3473f882001-02-23 17:55:21 +00001638 default:
1639 break;
1640 }
1641 handler = xmlGetCharEncodingHandler(enc);
1642 if (handler == NULL) {
1643 /*
1644 * Default handlers.
1645 */
1646 switch (enc) {
1647 case XML_CHAR_ENCODING_ERROR:
1648 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001653 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1654 break;
1655 case XML_CHAR_ENCODING_NONE:
1656 /* let's assume it's UTF-8 without the XML decl */
1657 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1658 return(0);
1659 case XML_CHAR_ENCODING_UTF8:
1660 case XML_CHAR_ENCODING_ASCII:
1661 /* default encoding, no conversion should be needed */
1662 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1663 return(0);
1664 case XML_CHAR_ENCODING_UTF16LE:
1665 break;
1666 case XML_CHAR_ENCODING_UTF16BE:
1667 break;
1668 case XML_CHAR_ENCODING_UCS4LE:
1669 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "char encoding USC4 little endian not supported\n");
1673 break;
1674 case XML_CHAR_ENCODING_UCS4BE:
1675 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1677 ctxt->sax->error(ctxt->userData,
1678 "char encoding USC4 big endian not supported\n");
1679 break;
1680 case XML_CHAR_ENCODING_EBCDIC:
1681 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1683 ctxt->sax->error(ctxt->userData,
1684 "char encoding EBCDIC not supported\n");
1685 break;
1686 case XML_CHAR_ENCODING_UCS4_2143:
1687 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689 ctxt->sax->error(ctxt->userData,
1690 "char encoding UCS4 2143 not supported\n");
1691 break;
1692 case XML_CHAR_ENCODING_UCS4_3412:
1693 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1695 ctxt->sax->error(ctxt->userData,
1696 "char encoding UCS4 3412 not supported\n");
1697 break;
1698 case XML_CHAR_ENCODING_UCS2:
1699 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1701 ctxt->sax->error(ctxt->userData,
1702 "char encoding UCS2 not supported\n");
1703 break;
1704 case XML_CHAR_ENCODING_8859_1:
1705 case XML_CHAR_ENCODING_8859_2:
1706 case XML_CHAR_ENCODING_8859_3:
1707 case XML_CHAR_ENCODING_8859_4:
1708 case XML_CHAR_ENCODING_8859_5:
1709 case XML_CHAR_ENCODING_8859_6:
1710 case XML_CHAR_ENCODING_8859_7:
1711 case XML_CHAR_ENCODING_8859_8:
1712 case XML_CHAR_ENCODING_8859_9:
1713 /*
1714 * We used to keep the internal content in the
1715 * document encoding however this turns being unmaintainable
1716 * So xmlGetCharEncodingHandler() will return non-null
1717 * values for this now.
1718 */
1719 if ((ctxt->inputNr == 1) &&
1720 (ctxt->encoding == NULL) &&
1721 (ctxt->input->encoding != NULL)) {
1722 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1723 }
1724 ctxt->charset = enc;
1725 return(0);
1726 case XML_CHAR_ENCODING_2022_JP:
1727 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1729 ctxt->sax->error(ctxt->userData,
1730 "char encoding ISO-2022-JPnot supported\n");
1731 break;
1732 case XML_CHAR_ENCODING_SHIFT_JIS:
1733 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "char encoding Shift_JIS not supported\n");
1737 break;
1738 case XML_CHAR_ENCODING_EUC_JP:
1739 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
1742 "char encoding EUC-JPnot supported\n");
1743 break;
1744 }
1745 }
1746 if (handler == NULL)
1747 return(-1);
1748 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1749 return(xmlSwitchToEncoding(ctxt, handler));
1750}
1751
1752/**
1753 * xmlSwitchToEncoding:
1754 * @ctxt: the parser context
1755 * @handler: the encoding handler
1756 *
1757 * change the input functions when discovering the character encoding
1758 * of a given entity.
1759 *
1760 * Returns 0 in case of success, -1 otherwise
1761 */
1762int
1763xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1764{
1765 int nbchars;
1766
1767 if (handler != NULL) {
1768 if (ctxt->input != NULL) {
1769 if (ctxt->input->buf != NULL) {
1770 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001771 /*
1772 * Check in case the auto encoding detetection triggered
1773 * in already.
1774 */
Owen Taylor3473f882001-02-23 17:55:21 +00001775 if (ctxt->input->buf->encoder == handler)
1776 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001777
1778 /*
1779 * "UTF-16" can be used for both LE and BE
Daniel Veillard878eab02002-02-19 13:46:09 +00001780 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1781 BAD_CAST "UTF-16", 6)) &&
1782 (!xmlStrncmp(BAD_CAST handler->name,
1783 BAD_CAST "UTF-16", 6))) {
1784 return(0);
1785 }
Daniel Veillarda6874ca2003-07-29 16:47:24 +00001786 */
Daniel Veillard878eab02002-02-19 13:46:09 +00001787
Owen Taylor3473f882001-02-23 17:55:21 +00001788 /*
1789 * Note: this is a bit dangerous, but that's what it
1790 * takes to use nearly compatible signature for different
1791 * encodings.
1792 */
1793 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1794 ctxt->input->buf->encoder = handler;
1795 return(0);
1796 }
1797 ctxt->input->buf->encoder = handler;
1798
1799 /*
1800 * Is there already some content down the pipe to convert ?
1801 */
1802 if ((ctxt->input->buf->buffer != NULL) &&
1803 (ctxt->input->buf->buffer->use > 0)) {
1804 int processed;
1805
1806 /*
1807 * Specific handling of the Byte Order Mark for
1808 * UTF-16
1809 */
1810 if ((handler->name != NULL) &&
1811 (!strcmp(handler->name, "UTF-16LE")) &&
1812 (ctxt->input->cur[0] == 0xFF) &&
1813 (ctxt->input->cur[1] == 0xFE)) {
1814 ctxt->input->cur += 2;
1815 }
1816 if ((handler->name != NULL) &&
1817 (!strcmp(handler->name, "UTF-16BE")) &&
1818 (ctxt->input->cur[0] == 0xFE) &&
1819 (ctxt->input->cur[1] == 0xFF)) {
1820 ctxt->input->cur += 2;
1821 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001822 /*
1823 * Errata on XML-1.0 June 20 2001
1824 * Specific handling of the Byte Order Mark for
1825 * UTF-8
1826 */
1827 if ((handler->name != NULL) &&
1828 (!strcmp(handler->name, "UTF-8")) &&
1829 (ctxt->input->cur[0] == 0xEF) &&
1830 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001831 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001832 ctxt->input->cur += 3;
1833 }
Owen Taylor3473f882001-02-23 17:55:21 +00001834
1835 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001836 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001837 * Move it as the raw buffer and create a new input buffer
1838 */
1839 processed = ctxt->input->cur - ctxt->input->base;
1840 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1841 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1842 ctxt->input->buf->buffer = xmlBufferCreate();
1843
1844 if (ctxt->html) {
1845 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001846 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001847 */
1848 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1849 ctxt->input->buf->buffer,
1850 ctxt->input->buf->raw);
1851 } else {
1852 /*
1853 * convert just enough to get
1854 * '<?xml version="1.0" encoding="xxx"?>'
1855 * parsed with the autodetected encoding
1856 * into the parser reading buffer.
1857 */
1858 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1859 ctxt->input->buf->buffer,
1860 ctxt->input->buf->raw);
1861 }
1862 if (nbchars < 0) {
1863 xmlGenericError(xmlGenericErrorContext,
1864 "xmlSwitchToEncoding: encoder error\n");
1865 return(-1);
1866 }
1867 ctxt->input->base =
1868 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001869 ctxt->input->end =
1870 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001871
1872 }
1873 return(0);
1874 } else {
1875 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1876 /*
1877 * When parsing a static memory array one must know the
1878 * size to be able to convert the buffer.
1879 */
1880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1881 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001882 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001883 return(-1);
1884 } else {
1885 int processed;
1886
1887 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001888 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001889 * Move it as the raw buffer and create a new input buffer
1890 */
1891 processed = ctxt->input->cur - ctxt->input->base;
1892
1893 ctxt->input->buf->raw = xmlBufferCreate();
1894 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1895 ctxt->input->length - processed);
1896 ctxt->input->buf->buffer = xmlBufferCreate();
1897
1898 /*
1899 * convert as much as possible of the raw input
1900 * to the parser reading buffer.
1901 */
1902 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1903 ctxt->input->buf->buffer,
1904 ctxt->input->buf->raw);
1905 if (nbchars < 0) {
1906 xmlGenericError(xmlGenericErrorContext,
1907 "xmlSwitchToEncoding: encoder error\n");
1908 return(-1);
1909 }
1910
1911 /*
1912 * Conversion succeeded, get rid of the old buffer
1913 */
1914 if ((ctxt->input->free != NULL) &&
1915 (ctxt->input->base != NULL))
1916 ctxt->input->free((xmlChar *) ctxt->input->base);
1917 ctxt->input->base =
1918 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001919 ctxt->input->end =
1920 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001921 }
1922 }
1923 } else {
1924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1925 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001926 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001927 return(-1);
1928 }
1929 /*
1930 * The parsing is now done in UTF8 natively
1931 */
1932 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1933 } else
1934 return(-1);
1935 return(0);
1936
1937}
1938
1939/************************************************************************
1940 * *
1941 * Commodity functions to handle entities processing *
1942 * *
1943 ************************************************************************/
1944
1945/**
1946 * xmlFreeInputStream:
1947 * @input: an xmlParserInputPtr
1948 *
1949 * Free up an input stream.
1950 */
1951void
1952xmlFreeInputStream(xmlParserInputPtr input) {
1953 if (input == NULL) return;
1954
1955 if (input->filename != NULL) xmlFree((char *) input->filename);
1956 if (input->directory != NULL) xmlFree((char *) input->directory);
1957 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1958 if (input->version != NULL) xmlFree((char *) input->version);
1959 if ((input->free != NULL) && (input->base != NULL))
1960 input->free((xmlChar *) input->base);
1961 if (input->buf != NULL)
1962 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001963 xmlFree(input);
1964}
1965
1966/**
1967 * xmlNewInputStream:
1968 * @ctxt: an XML parser context
1969 *
1970 * Create a new input stream structure
1971 * Returns the new input stream or NULL
1972 */
1973xmlParserInputPtr
1974xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1975 xmlParserInputPtr input;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001976 static int id = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001977
1978 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1979 if (input == NULL) {
1980 if (ctxt != NULL) {
1981 ctxt->errNo = XML_ERR_NO_MEMORY;
1982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1983 ctxt->sax->error(ctxt->userData,
1984 "malloc: couldn't allocate a new input stream\n");
1985 ctxt->errNo = XML_ERR_NO_MEMORY;
1986 }
1987 return(NULL);
1988 }
1989 memset(input, 0, sizeof(xmlParserInput));
1990 input->line = 1;
1991 input->col = 1;
1992 input->standalone = -1;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001993 /*
1994 * we don't care about thread reentrancy unicity for a single
1995 * parser context (and hence thread) is sufficient.
1996 */
1997 input->id = id++;
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return(input);
1999}
2000
2001/**
2002 * xmlNewIOInputStream:
2003 * @ctxt: an XML parser context
2004 * @input: an I/O Input
2005 * @enc: the charset encoding if known
2006 *
2007 * Create a new input stream structure encapsulating the @input into
2008 * a stream suitable for the parser.
2009 *
2010 * Returns the new input stream or NULL
2011 */
2012xmlParserInputPtr
2013xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
2014 xmlCharEncoding enc) {
2015 xmlParserInputPtr inputStream;
2016
2017 if (xmlParserDebugEntities)
2018 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
2019 inputStream = xmlNewInputStream(ctxt);
2020 if (inputStream == NULL) {
2021 return(NULL);
2022 }
2023 inputStream->filename = NULL;
2024 inputStream->buf = input;
2025 inputStream->base = inputStream->buf->buffer->content;
2026 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002027 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002028 if (enc != XML_CHAR_ENCODING_NONE) {
2029 xmlSwitchEncoding(ctxt, enc);
2030 }
2031
2032 return(inputStream);
2033}
2034
2035/**
2036 * xmlNewEntityInputStream:
2037 * @ctxt: an XML parser context
2038 * @entity: an Entity pointer
2039 *
2040 * Create a new input stream based on an xmlEntityPtr
2041 *
2042 * Returns the new input stream or NULL
2043 */
2044xmlParserInputPtr
2045xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2046 xmlParserInputPtr input;
2047
2048 if (entity == NULL) {
2049 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2051 ctxt->sax->error(ctxt->userData,
2052 "internal: xmlNewEntityInputStream entity = NULL\n");
2053 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2054 return(NULL);
2055 }
2056 if (xmlParserDebugEntities)
2057 xmlGenericError(xmlGenericErrorContext,
2058 "new input from entity: %s\n", entity->name);
2059 if (entity->content == NULL) {
2060 switch (entity->etype) {
2061 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2062 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2064 ctxt->sax->error(ctxt->userData,
2065 "xmlNewEntityInputStream unparsed entity !\n");
2066 break;
2067 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2068 case XML_EXTERNAL_PARAMETER_ENTITY:
2069 return(xmlLoadExternalEntity((char *) entity->URI,
2070 (char *) entity->ExternalID, ctxt));
2071 case XML_INTERNAL_GENERAL_ENTITY:
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "Internal entity %s without content !\n", entity->name);
2075 break;
2076 case XML_INTERNAL_PARAMETER_ENTITY:
2077 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2079 ctxt->sax->error(ctxt->userData,
2080 "Internal parameter entity %s without content !\n", entity->name);
2081 break;
2082 case XML_INTERNAL_PREDEFINED_ENTITY:
2083 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2085 ctxt->sax->error(ctxt->userData,
2086 "Predefined entity %s without content !\n", entity->name);
2087 break;
2088 }
2089 return(NULL);
2090 }
2091 input = xmlNewInputStream(ctxt);
2092 if (input == NULL) {
2093 return(NULL);
2094 }
2095 input->filename = (char *) entity->URI;
2096 input->base = entity->content;
2097 input->cur = entity->content;
2098 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002099 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002100 return(input);
2101}
2102
2103/**
2104 * xmlNewStringInputStream:
2105 * @ctxt: an XML parser context
2106 * @buffer: an memory buffer
2107 *
2108 * Create a new input stream based on a memory buffer.
2109 * Returns the new input stream
2110 */
2111xmlParserInputPtr
2112xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2113 xmlParserInputPtr input;
2114
2115 if (buffer == NULL) {
2116 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2118 ctxt->sax->error(ctxt->userData,
2119 "internal: xmlNewStringInputStream string = NULL\n");
2120 return(NULL);
2121 }
2122 if (xmlParserDebugEntities)
2123 xmlGenericError(xmlGenericErrorContext,
2124 "new fixed input: %.30s\n", buffer);
2125 input = xmlNewInputStream(ctxt);
2126 if (input == NULL) {
2127 return(NULL);
2128 }
2129 input->base = buffer;
2130 input->cur = buffer;
2131 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002132 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002133 return(input);
2134}
2135
2136/**
2137 * xmlNewInputFromFile:
2138 * @ctxt: an XML parser context
2139 * @filename: the filename to use as entity
2140 *
2141 * Create a new input stream based on a file.
2142 *
2143 * Returns the new input stream or NULL in case of error
2144 */
2145xmlParserInputPtr
2146xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2147 xmlParserInputBufferPtr buf;
2148 xmlParserInputPtr inputStream;
2149 char *directory = NULL;
2150 xmlChar *URI = NULL;
2151
2152 if (xmlParserDebugEntities)
2153 xmlGenericError(xmlGenericErrorContext,
2154 "new input from file: %s\n", filename);
2155 if (ctxt == NULL) return(NULL);
2156 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2157 if (buf == NULL)
2158 return(NULL);
2159
2160 URI = xmlStrdup((xmlChar *) filename);
2161 directory = xmlParserGetDirectory((const char *) URI);
2162
2163 inputStream = xmlNewInputStream(ctxt);
2164 if (inputStream == NULL) {
2165 if (directory != NULL) xmlFree((char *) directory);
2166 if (URI != NULL) xmlFree((char *) URI);
2167 return(NULL);
2168 }
2169
Daniel Veillard8d8bf2c2003-09-17 19:36:25 +00002170 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
Daniel Veillarda66b1d12003-09-17 20:54:38 +00002171 if (URI != NULL) xmlFree((char *) URI);
Owen Taylor3473f882001-02-23 17:55:21 +00002172 inputStream->directory = directory;
2173 inputStream->buf = buf;
2174
2175 inputStream->base = inputStream->buf->buffer->content;
2176 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002177 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002178 if ((ctxt->directory == NULL) && (directory != NULL))
2179 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2180 return(inputStream);
2181}
2182
2183/************************************************************************
2184 * *
2185 * Commodity functions to handle parser contexts *
2186 * *
2187 ************************************************************************/
2188
2189/**
2190 * xmlInitParserCtxt:
2191 * @ctxt: an XML parser context
2192 *
2193 * Initialize a parser context
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002194 *
2195 * Returns 0 in case of success and -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00002196 */
2197
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002198int
Owen Taylor3473f882001-02-23 17:55:21 +00002199xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2200{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002201 if(ctxt==NULL) {
2202 xmlGenericError(xmlGenericErrorContext,
2203 "xmlInitParserCtxt: NULL context given\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002204 return(-1);
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002205 }
2206
Owen Taylor3473f882001-02-23 17:55:21 +00002207 xmlDefaultSAXHandlerInit();
2208
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002209 ctxt->dict = xmlDictCreate();
2210 if (ctxt->dict == NULL) {
2211 xmlGenericError(xmlGenericErrorContext,
2212 "xmlInitParserCtxt: out of memory\n");
2213 return(-1);
2214 }
William M. Brack8b2c7f12002-11-22 05:07:29 +00002215 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2216 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002217 xmlGenericError(xmlGenericErrorContext,
2218 "xmlInitParserCtxt: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002219 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002220 }
2221 else
Daniel Veillard092643b2003-09-25 14:29:29 +00002222 xmlSAXVersion(ctxt->sax, 2);
Owen Taylor3473f882001-02-23 17:55:21 +00002223
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002224 ctxt->maxatts = 0;
2225 ctxt->atts = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002226 /* Allocate the Input stack */
2227 ctxt->inputTab = (xmlParserInputPtr *)
2228 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2229 if (ctxt->inputTab == NULL) {
2230 xmlGenericError(xmlGenericErrorContext,
2231 "xmlInitParserCtxt: out of memory\n");
2232 ctxt->inputNr = 0;
2233 ctxt->inputMax = 0;
2234 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002235 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002236 }
2237 ctxt->inputNr = 0;
2238 ctxt->inputMax = 5;
2239 ctxt->input = NULL;
2240
2241 ctxt->version = NULL;
2242 ctxt->encoding = NULL;
2243 ctxt->standalone = -1;
2244 ctxt->hasExternalSubset = 0;
2245 ctxt->hasPErefs = 0;
2246 ctxt->html = 0;
2247 ctxt->external = 0;
2248 ctxt->instate = XML_PARSER_START;
2249 ctxt->token = 0;
2250 ctxt->directory = NULL;
2251
2252 /* Allocate the Node stack */
2253 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2254 if (ctxt->nodeTab == NULL) {
2255 xmlGenericError(xmlGenericErrorContext,
2256 "xmlInitParserCtxt: out of memory\n");
2257 ctxt->nodeNr = 0;
2258 ctxt->nodeMax = 0;
2259 ctxt->node = NULL;
2260 ctxt->inputNr = 0;
2261 ctxt->inputMax = 0;
2262 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002263 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002264 }
2265 ctxt->nodeNr = 0;
2266 ctxt->nodeMax = 10;
2267 ctxt->node = NULL;
2268
2269 /* Allocate the Name stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002270 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (ctxt->nameTab == NULL) {
2272 xmlGenericError(xmlGenericErrorContext,
2273 "xmlInitParserCtxt: out of memory\n");
2274 ctxt->nodeNr = 0;
2275 ctxt->nodeMax = 0;
2276 ctxt->node = NULL;
2277 ctxt->inputNr = 0;
2278 ctxt->inputMax = 0;
2279 ctxt->input = NULL;
2280 ctxt->nameNr = 0;
2281 ctxt->nameMax = 0;
2282 ctxt->name = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002283 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002284 }
2285 ctxt->nameNr = 0;
2286 ctxt->nameMax = 10;
2287 ctxt->name = NULL;
2288
2289 /* Allocate the space stack */
2290 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2291 if (ctxt->spaceTab == NULL) {
2292 xmlGenericError(xmlGenericErrorContext,
2293 "xmlInitParserCtxt: out of memory\n");
2294 ctxt->nodeNr = 0;
2295 ctxt->nodeMax = 0;
2296 ctxt->node = NULL;
2297 ctxt->inputNr = 0;
2298 ctxt->inputMax = 0;
2299 ctxt->input = NULL;
2300 ctxt->nameNr = 0;
2301 ctxt->nameMax = 0;
2302 ctxt->name = NULL;
2303 ctxt->spaceNr = 0;
2304 ctxt->spaceMax = 0;
2305 ctxt->space = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002306 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 }
2308 ctxt->spaceNr = 1;
2309 ctxt->spaceMax = 10;
2310 ctxt->spaceTab[0] = -1;
2311 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002312 ctxt->userData = ctxt;
2313 ctxt->myDoc = NULL;
2314 ctxt->wellFormed = 1;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00002315 ctxt->nsWellFormed = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002316 ctxt->valid = 1;
2317 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2318 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2319 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002320 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002321 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002322 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002323 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002324
Owen Taylor3473f882001-02-23 17:55:21 +00002325 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002326 ctxt->vctxt.error = xmlParserValidityError;
2327 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002328 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002329 if (xmlGetWarningsDefaultValue == 0)
2330 ctxt->vctxt.warning = NULL;
2331 else
2332 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002333 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002334 }
2335 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2336 ctxt->record_info = 0;
2337 ctxt->nbChars = 0;
2338 ctxt->checkIndex = 0;
2339 ctxt->inSubset = 0;
2340 ctxt->errNo = XML_ERR_OK;
2341 ctxt->depth = 0;
2342 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002343 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002344 xmlInitNodeInfoSeq(&ctxt->node_seq);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002345 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002346}
2347
2348/**
2349 * xmlFreeParserCtxt:
2350 * @ctxt: an XML parser context
2351 *
2352 * Free all the memory used by a parser context. However the parsed
2353 * document in ctxt->myDoc is not freed.
2354 */
2355
2356void
2357xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2358{
2359 xmlParserInputPtr input;
Owen Taylor3473f882001-02-23 17:55:21 +00002360
2361 if (ctxt == NULL) return;
2362
2363 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2364 xmlFreeInputStream(input);
2365 }
Owen Taylor3473f882001-02-23 17:55:21 +00002366 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002367 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002368 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2369 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2370 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2371 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00002372 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2373 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Daniel Veillard092643b2003-09-25 14:29:29 +00002374 if ((ctxt->sax != NULL) &&
2375 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
Owen Taylor3473f882001-02-23 17:55:21 +00002376 xmlFree(ctxt->sax);
2377 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002378 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002379 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002380 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002381 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002382 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2383 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2384 if (ctxt->attsDefault != NULL)
2385 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002386 if (ctxt->attsSpecial != NULL)
2387 xmlHashFree(ctxt->attsSpecial, NULL);
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00002388 if (ctxt->freeElems != NULL) {
2389 xmlNodePtr cur, next;
2390
2391 cur = ctxt->freeElems;
2392 while (cur != NULL) {
2393 next = cur->next;
2394 xmlFree(cur);
2395 cur = next;
2396 }
2397 }
2398 if (ctxt->freeAttrs != NULL) {
2399 xmlAttrPtr cur, next;
2400
2401 cur = ctxt->freeAttrs;
2402 while (cur != NULL) {
2403 next = cur->next;
2404 xmlFree(cur);
2405 cur = next;
2406 }
2407 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00002408
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002409#ifdef LIBXML_CATALOG_ENABLED
2410 if (ctxt->catalogs != NULL)
2411 xmlCatalogFreeLocal(ctxt->catalogs);
2412#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002413 xmlFree(ctxt);
2414}
2415
2416/**
2417 * xmlNewParserCtxt:
2418 *
2419 * Allocate and initialize a new parser context.
2420 *
2421 * Returns the xmlParserCtxtPtr or NULL
2422 */
2423
2424xmlParserCtxtPtr
2425xmlNewParserCtxt()
2426{
2427 xmlParserCtxtPtr ctxt;
2428
2429 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2430 if (ctxt == NULL) {
2431 xmlGenericError(xmlGenericErrorContext,
2432 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002433 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002434 return(NULL);
2435 }
2436 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002437 if (xmlInitParserCtxt(ctxt) < 0) {
2438 xmlFreeParserCtxt(ctxt);
2439 return(NULL);
2440 }
Owen Taylor3473f882001-02-23 17:55:21 +00002441 return(ctxt);
2442}
2443
2444/************************************************************************
2445 * *
2446 * Handling of node informations *
2447 * *
2448 ************************************************************************/
2449
2450/**
2451 * xmlClearParserCtxt:
2452 * @ctxt: an XML parser context
2453 *
2454 * Clear (release owned resources) and reinitialize a parser context
2455 */
2456
2457void
2458xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2459{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002460 if (ctxt==NULL)
2461 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002462 xmlClearNodeInfoSeq(&ctxt->node_seq);
2463 xmlInitParserCtxt(ctxt);
2464}
2465
2466/**
2467 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002468 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002469 * @node: an XML node within the tree
2470 *
2471 * Find the parser node info struct for a given node
2472 *
2473 * Returns an xmlParserNodeInfo block pointer or NULL
2474 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002475const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2476 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002477{
2478 unsigned long pos;
2479
2480 /* Find position where node should be at */
2481 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002482 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002483 return &ctx->node_seq.buffer[pos];
2484 else
2485 return NULL;
2486}
2487
2488
2489/**
2490 * xmlInitNodeInfoSeq:
2491 * @seq: a node info sequence pointer
2492 *
2493 * -- Initialize (set to initial state) node info sequence
2494 */
2495void
2496xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2497{
2498 seq->length = 0;
2499 seq->maximum = 0;
2500 seq->buffer = NULL;
2501}
2502
2503/**
2504 * xmlClearNodeInfoSeq:
2505 * @seq: a node info sequence pointer
2506 *
2507 * -- Clear (release memory and reinitialize) node
2508 * info sequence
2509 */
2510void
2511xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2512{
2513 if ( seq->buffer != NULL )
2514 xmlFree(seq->buffer);
2515 xmlInitNodeInfoSeq(seq);
2516}
2517
2518
2519/**
2520 * xmlParserFindNodeInfoIndex:
2521 * @seq: a node info sequence pointer
2522 * @node: an XML node pointer
2523 *
2524 *
2525 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2526 * the given node is or should be at in a sorted sequence
2527 *
2528 * Returns a long indicating the position of the record
2529 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002530unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2531 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002532{
2533 unsigned long upper, lower, middle;
2534 int found = 0;
2535
2536 /* Do a binary search for the key */
2537 lower = 1;
2538 upper = seq->length;
2539 middle = 0;
2540 while ( lower <= upper && !found) {
2541 middle = lower + (upper - lower) / 2;
2542 if ( node == seq->buffer[middle - 1].node )
2543 found = 1;
2544 else if ( node < seq->buffer[middle - 1].node )
2545 upper = middle - 1;
2546 else
2547 lower = middle + 1;
2548 }
2549
2550 /* Return position */
2551 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2552 return middle;
2553 else
2554 return middle - 1;
2555}
2556
2557
2558/**
2559 * xmlParserAddNodeInfo:
2560 * @ctxt: an XML parser context
2561 * @info: a node info sequence pointer
2562 *
2563 * Insert node info record into the sorted sequence
2564 */
2565void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002566xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002567 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002568{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002569 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002570
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002571 /* Find pos and check to see if node is already in the sequence */
William M. Brack78637da2003-07-31 14:47:38 +00002572 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002573 info->node);
2574 if (pos < ctxt->node_seq.length
2575 && ctxt->node_seq.buffer[pos].node == info->node) {
2576 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002577 }
2578
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002579 /* Otherwise, we need to add new node to buffer */
2580 else {
2581 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2582 xmlParserNodeInfo *tmp_buffer;
2583 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002584
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002585 if (ctxt->node_seq.maximum == 0)
2586 ctxt->node_seq.maximum = 2;
2587 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2588 (2 * ctxt->node_seq.maximum));
2589
2590 if (ctxt->node_seq.buffer == NULL)
Daniel Veillardc4f65ab2003-04-21 23:07:45 +00002591 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002592 else
2593 tmp_buffer =
2594 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2595 byte_size);
2596
2597 if (tmp_buffer == NULL) {
2598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2599 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2600 ctxt->errNo = XML_ERR_NO_MEMORY;
2601 return;
2602 }
2603 ctxt->node_seq.buffer = tmp_buffer;
2604 ctxt->node_seq.maximum *= 2;
2605 }
2606
2607 /* If position is not at end, move elements out of the way */
2608 if (pos != ctxt->node_seq.length) {
2609 unsigned long i;
2610
2611 for (i = ctxt->node_seq.length; i > pos; i--)
2612 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2613 }
2614
2615 /* Copy element and increase length */
2616 ctxt->node_seq.buffer[pos] = *info;
2617 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
Owen Taylor3473f882001-02-23 17:55:21 +00002619}
2620
2621/************************************************************************
2622 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002623 * Defaults settings *
2624 * *
2625 ************************************************************************/
2626/**
2627 * xmlPedanticParserDefault:
2628 * @val: int 0 or 1
2629 *
2630 * Set and return the previous value for enabling pedantic warnings.
2631 *
2632 * Returns the last value for 0 for no substitution, 1 for substitution.
2633 */
2634
2635int
2636xmlPedanticParserDefault(int val) {
2637 int old = xmlPedanticParserDefaultValue;
2638
2639 xmlPedanticParserDefaultValue = val;
2640 return(old);
2641}
2642
2643/**
2644 * xmlLineNumbersDefault:
2645 * @val: int 0 or 1
2646 *
2647 * Set and return the previous value for enabling line numbers in elements
2648 * contents. This may break on old application and is turned off by default.
2649 *
2650 * Returns the last value for 0 for no substitution, 1 for substitution.
2651 */
2652
2653int
2654xmlLineNumbersDefault(int val) {
2655 int old = xmlLineNumbersDefaultValue;
2656
2657 xmlLineNumbersDefaultValue = val;
2658 return(old);
2659}
2660
2661/**
2662 * xmlSubstituteEntitiesDefault:
2663 * @val: int 0 or 1
2664 *
2665 * Set and return the previous value for default entity support.
2666 * Initially the parser always keep entity references instead of substituting
2667 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002668 * default parser behavior
2669 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002670 * file basis.
2671 *
2672 * Returns the last value for 0 for no substitution, 1 for substitution.
2673 */
2674
2675int
2676xmlSubstituteEntitiesDefault(int val) {
2677 int old = xmlSubstituteEntitiesDefaultValue;
2678
2679 xmlSubstituteEntitiesDefaultValue = val;
2680 return(old);
2681}
2682
2683/**
2684 * xmlKeepBlanksDefault:
2685 * @val: int 0 or 1
2686 *
2687 * Set and return the previous value for default blanks text nodes support.
2688 * The 1.x version of the parser used an heuristic to try to detect
2689 * ignorable white spaces. As a result the SAX callback was generating
2690 * ignorableWhitespace() callbacks instead of characters() one, and when
2691 * using the DOM output text nodes containing those blanks were not generated.
2692 * The 2.x and later version will switch to the XML standard way and
2693 * ignorableWhitespace() are only generated when running the parser in
2694 * validating mode and when the current element doesn't allow CDATA or
2695 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002696 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002697 * on 1.X libs and to switch back to the old mode for compatibility when
2698 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2699 * by using xmlIsBlankNode() commodity function to detect the "empty"
2700 * nodes generated.
2701 * This value also affect autogeneration of indentation when saving code
2702 * if blanks sections are kept, indentation is not generated.
2703 *
2704 * Returns the last value for 0 for no substitution, 1 for substitution.
2705 */
2706
2707int
2708xmlKeepBlanksDefault(int val) {
2709 int old = xmlKeepBlanksDefaultValue;
2710
2711 xmlKeepBlanksDefaultValue = val;
2712 xmlIndentTreeOutput = !val;
2713 return(old);
2714}
2715
2716/************************************************************************
2717 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002718 * Deprecated functions kept for compatibility *
2719 * *
2720 ************************************************************************/
2721
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002722/**
2723 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002724 * @lang: pointer to the string value
2725 *
2726 * Checks that the value conforms to the LanguageID production:
2727 *
2728 * NOTE: this is somewhat deprecated, those productions were removed from
2729 * the XML Second edition.
2730 *
2731 * [33] LanguageID ::= Langcode ('-' Subcode)*
2732 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2733 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2734 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2735 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2736 * [38] Subcode ::= ([a-z] | [A-Z])+
2737 *
2738 * Returns 1 if correct 0 otherwise
2739 **/
2740int
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002741xmlCheckLanguageID(const xmlChar * lang)
2742{
Owen Taylor3473f882001-02-23 17:55:21 +00002743 const xmlChar *cur = lang;
2744
2745 if (cur == NULL)
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002746 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002748 ((cur[0] == 'I') && (cur[1] == '-'))) {
2749 /*
2750 * IANA code
2751 */
2752 cur += 2;
2753 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2754 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2755 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002757 ((cur[0] == 'X') && (cur[1] == '-'))) {
2758 /*
2759 * User code
2760 */
2761 cur += 2;
2762 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2763 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2764 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00002765 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002766 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2767 /*
2768 * ISO639
2769 */
2770 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00002771 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002772 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2773 cur++;
2774 else
2775 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002776 } else
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002777 return (0);
2778 while (cur[0] != 0) { /* non input consuming */
2779 if (cur[0] != '-')
2780 return (0);
2781 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00002782 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002783 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2784 cur++;
2785 else
2786 return (0);
2787 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2788 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2789 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00002790 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002791 return (1);
Owen Taylor3473f882001-02-23 17:55:21 +00002792}
2793
2794/**
2795 * xmlDecodeEntities:
2796 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002797 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002798 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002799 * @end: an end marker xmlChar, 0 if none
2800 * @end2: an end marker xmlChar, 0 if none
2801 * @end3: an end marker xmlChar, 0 if none
2802 *
2803 * This function is deprecated, we now always process entities content
2804 * through xmlStringDecodeEntities
2805 *
2806 * TODO: remove it in next major release.
2807 *
2808 * [67] Reference ::= EntityRef | CharRef
2809 *
2810 * [69] PEReference ::= '%' Name ';'
2811 *
2812 * Returns A newly allocated string with the substitution done. The caller
2813 * must deallocate it !
2814 */
2815xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002816xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
2817 int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2818 xmlChar end ATTRIBUTE_UNUSED,
2819 xmlChar end2 ATTRIBUTE_UNUSED,
2820 xmlChar end3 ATTRIBUTE_UNUSED)
2821{
Owen Taylor3473f882001-02-23 17:55:21 +00002822 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002823
Owen Taylor3473f882001-02-23 17:55:21 +00002824 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002825 xmlGenericError(xmlGenericErrorContext,
2826 "xmlDecodeEntities() deprecated function reached\n");
2827 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002828 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002829 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002830}
2831
2832/**
2833 * xmlNamespaceParseNCName:
2834 * @ctxt: an XML parser context
2835 *
2836 * parse an XML namespace name.
2837 *
2838 * TODO: this seems not in use anymore, the namespace handling is done on
2839 * top of the SAX interfaces, i.e. not on raw input.
2840 *
2841 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2842 *
2843 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2844 * CombiningChar | Extender
2845 *
2846 * Returns the namespace name or NULL
2847 */
2848
2849xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002850xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
2851{
Owen Taylor3473f882001-02-23 17:55:21 +00002852 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002853
Owen Taylor3473f882001-02-23 17:55:21 +00002854 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002855 xmlGenericError(xmlGenericErrorContext,
2856 "xmlNamespaceParseNCName() deprecated function reached\n");
2857 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002858 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002859 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002860}
2861
2862/**
2863 * xmlNamespaceParseQName:
2864 * @ctxt: an XML parser context
2865 * @prefix: a xmlChar **
2866 *
2867 * TODO: this seems not in use anymore, the namespace handling is done on
2868 * top of the SAX interfaces, i.e. not on raw input.
2869 *
2870 * parse an XML qualified name
2871 *
2872 * [NS 5] QName ::= (Prefix ':')? LocalPart
2873 *
2874 * [NS 6] Prefix ::= NCName
2875 *
2876 * [NS 7] LocalPart ::= NCName
2877 *
2878 * Returns the local part, and prefix is updated
2879 * to get the Prefix if any.
2880 */
2881
2882xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002883xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
2884 xmlChar ** prefix ATTRIBUTE_UNUSED)
2885{
Owen Taylor3473f882001-02-23 17:55:21 +00002886
2887 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002888
Owen Taylor3473f882001-02-23 17:55:21 +00002889 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002890 xmlGenericError(xmlGenericErrorContext,
2891 "xmlNamespaceParseQName() deprecated function reached\n");
2892 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002894 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002895}
2896
2897/**
2898 * xmlNamespaceParseNSDef:
2899 * @ctxt: an XML parser context
2900 *
2901 * parse a namespace prefix declaration
2902 *
2903 * TODO: this seems not in use anymore, the namespace handling is done on
2904 * top of the SAX interfaces, i.e. not on raw input.
2905 *
2906 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2907 *
2908 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2909 *
2910 * Returns the namespace name
2911 */
2912
2913xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002914xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
2915{
Owen Taylor3473f882001-02-23 17:55:21 +00002916 static int deprecated = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002917
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002918 if (!deprecated) {
2919 xmlGenericError(xmlGenericErrorContext,
2920 "xmlNamespaceParseNSDef() deprecated function reached\n");
2921 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002923 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002924}
2925
2926/**
2927 * xmlParseQuotedString:
2928 * @ctxt: an XML parser context
2929 *
2930 * Parse and return a string between quotes or doublequotes
2931 *
2932 * TODO: Deprecated, to be removed at next drop of binary compatibility
2933 *
2934 * Returns the string parser or NULL.
2935 */
2936xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002937xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
2938{
Owen Taylor3473f882001-02-23 17:55:21 +00002939 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002940
Owen Taylor3473f882001-02-23 17:55:21 +00002941 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002942 xmlGenericError(xmlGenericErrorContext,
2943 "xmlParseQuotedString() deprecated function reached\n");
2944 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002945 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002946 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002947}
2948
2949/**
2950 * xmlParseNamespace:
2951 * @ctxt: an XML parser context
2952 *
2953 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2954 *
2955 * This is what the older xml-name Working Draft specified, a bunch of
2956 * other stuff may still rely on it, so support is still here as
2957 * if it was declared on the root of the Tree:-(
2958 *
2959 * TODO: remove from library
2960 *
2961 * To be removed at next drop of binary compatibility
2962 */
2963
2964void
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002965xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
2966{
Owen Taylor3473f882001-02-23 17:55:21 +00002967 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002968
Owen Taylor3473f882001-02-23 17:55:21 +00002969 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002970 xmlGenericError(xmlGenericErrorContext,
2971 "xmlParseNamespace() deprecated function reached\n");
2972 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002973 }
Owen Taylor3473f882001-02-23 17:55:21 +00002974}
2975
2976/**
2977 * xmlScanName:
2978 * @ctxt: an XML parser context
2979 *
2980 * Trickery: parse an XML name but without consuming the input flow
2981 * Needed for rollback cases. Used only when parsing entities references.
2982 *
2983 * TODO: seems deprecated now, only used in the default part of
2984 * xmlParserHandleReference
2985 *
2986 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2987 * CombiningChar | Extender
2988 *
2989 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2990 *
2991 * [6] Names ::= Name (S Name)*
2992 *
2993 * Returns the Name parsed or NULL
2994 */
2995
2996xmlChar *
Daniel Veillard877a7bd2003-09-13 00:16:32 +00002997xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
2998{
Owen Taylor3473f882001-02-23 17:55:21 +00002999 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003000
Owen Taylor3473f882001-02-23 17:55:21 +00003001 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003002 xmlGenericError(xmlGenericErrorContext,
3003 "xmlScanName() deprecated function reached\n");
3004 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003005 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003006 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003007}
3008
3009/**
3010 * xmlParserHandleReference:
3011 * @ctxt: the parser context
3012 *
3013 * TODO: Remove, now deprecated ... the test is done directly in the
3014 * content parsing
3015 * routines.
3016 *
3017 * [67] Reference ::= EntityRef | CharRef
3018 *
3019 * [68] EntityRef ::= '&' Name ';'
3020 *
3021 * [ WFC: Entity Declared ]
3022 * the Name given in the entity reference must match that in an entity
3023 * declaration, except that well-formed documents need not declare any
3024 * of the following entities: amp, lt, gt, apos, quot.
3025 *
3026 * [ WFC: Parsed Entity ]
3027 * An entity reference must not contain the name of an unparsed entity
3028 *
3029 * [66] CharRef ::= '&#' [0-9]+ ';' |
3030 * '&#x' [0-9a-fA-F]+ ';'
3031 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003032 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003033 * the handling is done accordingly to
3034 * http://www.w3.org/TR/REC-xml#entproc
3035 */
3036void
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003037xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED)
3038{
Owen Taylor3473f882001-02-23 17:55:21 +00003039 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003040
Owen Taylor3473f882001-02-23 17:55:21 +00003041 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003042 xmlGenericError(xmlGenericErrorContext,
3043 "xmlParserHandleReference() deprecated function reached\n");
3044 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003045 }
3046
Owen Taylor3473f882001-02-23 17:55:21 +00003047 return;
3048}
3049
3050/**
3051 * xmlHandleEntity:
3052 * @ctxt: an XML parser context
3053 * @entity: an XML entity pointer.
3054 *
3055 * Default handling of defined entities, when should we define a new input
3056 * stream ? When do we just handle that as a set of chars ?
3057 *
3058 * OBSOLETE: to be removed at some point.
3059 */
3060
3061void
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003062xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
3063 xmlEntityPtr entity ATTRIBUTE_UNUSED)
3064{
Owen Taylor3473f882001-02-23 17:55:21 +00003065 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003066
Owen Taylor3473f882001-02-23 17:55:21 +00003067 if (!deprecated) {
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003068 xmlGenericError(xmlGenericErrorContext,
3069 "xmlHandleEntity() deprecated function reached\n");
3070 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003071 }
Owen Taylor3473f882001-02-23 17:55:21 +00003072}
3073
3074/**
3075 * xmlNewGlobalNs:
3076 * @doc: the document carrying the namespace
3077 * @href: the URI associated
3078 * @prefix: the prefix for the namespace
3079 *
3080 * Creation of a Namespace, the old way using PI and without scoping
3081 * DEPRECATED !!!
3082 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003083 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003084 */
3085xmlNsPtr
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003086xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED,
3087 const xmlChar * href ATTRIBUTE_UNUSED,
3088 const xmlChar * prefix ATTRIBUTE_UNUSED)
3089{
Owen Taylor3473f882001-02-23 17:55:21 +00003090 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003091
Owen Taylor3473f882001-02-23 17:55:21 +00003092 if (!deprecated) {
Owen Taylor3473f882001-02-23 17:55:21 +00003093 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003094 "xmlNewGlobalNs() deprecated function reached\n");
3095 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003096 }
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003097 return (NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003098}
3099
3100/**
3101 * xmlUpgradeOldNs:
3102 * @doc: a document pointer
3103 *
3104 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3105 * DEPRECATED
3106 */
3107void
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003108xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED)
3109{
Owen Taylor3473f882001-02-23 17:55:21 +00003110 static int deprecated = 0;
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003111
Owen Taylor3473f882001-02-23 17:55:21 +00003112 if (!deprecated) {
Owen Taylor3473f882001-02-23 17:55:21 +00003113 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard877a7bd2003-09-13 00:16:32 +00003114 "xmlUpgradeOldNs() deprecated function reached\n");
3115 deprecated = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003116 }
Owen Taylor3473f882001-02-23 17:55:21 +00003117}