blob: 8b7b5151392c2319764f63242adb4d5cbaff90d3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard2fdbd322003-08-18 12:15:38 +000050#include <libxml/dict.h>
Daniel Veillard16698282001-09-14 10:29:27 +000051#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000052#ifdef LIBXML_CATALOG_ENABLED
53#include <libxml/catalog.h>
54#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000055#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillard56a4cb82001-03-24 17:00:36 +000057void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000058
Daniel Veillarda53c6882001-07-25 17:18:57 +000059/*
60 * Various global defaults for parsing
61 */
Owen Taylor3473f882001-02-23 17:55:21 +000062
Daniel Veillard5e2dace2001-07-18 19:30:27 +000063/**
Owen Taylor3473f882001-02-23 17:55:21 +000064 * xmlCheckVersion:
65 * @version: the include version number
66 *
67 * check the compiled lib version against the include one.
68 * This can warn or immediately kill the application
69 */
70void
71xmlCheckVersion(int version) {
72 int myversion = (int) LIBXML_VERSION;
73
Daniel Veillard6f350292001-10-14 09:56:15 +000074 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000075
Owen Taylor3473f882001-02-23 17:55:21 +000076 if ((myversion / 10000) != (version / 10000)) {
77 xmlGenericError(xmlGenericErrorContext,
78 "Fatal: program compiled against libxml %d using libxml %d\n",
79 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000080 fprintf(stderr,
81 "Fatal: program compiled against libxml %d using libxml %d\n",
82 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000083 }
84 if ((myversion / 100) < (version / 100)) {
85 xmlGenericError(xmlGenericErrorContext,
86 "Warning: program compiled against libxml %d using older %d\n",
87 (version / 100), (myversion / 100));
88 }
89}
90
91
Daniel Veillard22090732001-07-16 00:06:07 +000092static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000093 "validate",
94 "load subset",
95 "keep blanks",
96 "disable SAX",
97 "fetch external entities",
98 "substitute entities",
99 "gather line info",
100 "user data",
101 "is html",
102 "is standalone",
103 "stop parser",
104 "document",
105 "is well formed",
106 "is valid",
107 "SAX block",
108 "SAX function internalSubset",
109 "SAX function isStandalone",
110 "SAX function hasInternalSubset",
111 "SAX function hasExternalSubset",
112 "SAX function resolveEntity",
113 "SAX function getEntity",
114 "SAX function entityDecl",
115 "SAX function notationDecl",
116 "SAX function attributeDecl",
117 "SAX function elementDecl",
118 "SAX function unparsedEntityDecl",
119 "SAX function setDocumentLocator",
120 "SAX function startDocument",
121 "SAX function endDocument",
122 "SAX function startElement",
123 "SAX function endElement",
124 "SAX function reference",
125 "SAX function characters",
126 "SAX function ignorableWhitespace",
127 "SAX function processingInstruction",
128 "SAX function comment",
129 "SAX function warning",
130 "SAX function error",
131 "SAX function fatalError",
132 "SAX function getParameterEntity",
133 "SAX function cdataBlock",
134 "SAX function externalSubset",
135};
136
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000137/**
Owen Taylor3473f882001-02-23 17:55:21 +0000138 * xmlGetFeaturesList:
139 * @len: the length of the features name array (input/output)
140 * @result: an array of string to be filled with the features name.
141 *
142 * Copy at most *@len feature names into the @result array
143 *
144 * Returns -1 in case or error, or the total number of features,
145 * len is updated with the number of strings copied,
146 * strings must not be deallocated
147 */
148int
149xmlGetFeaturesList(int *len, const char **result) {
150 int ret, i;
151
152 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
153 if ((len == NULL) || (result == NULL))
154 return(ret);
155 if ((*len < 0) || (*len >= 1000))
156 return(-1);
157 if (*len > ret)
158 *len = ret;
159 for (i = 0;i < *len;i++)
160 result[i] = xmlFeaturesList[i];
161 return(ret);
162}
163
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000164/**
Owen Taylor3473f882001-02-23 17:55:21 +0000165 * xmlGetFeature:
166 * @ctxt: an XML/HTML parser context
167 * @name: the feature name
168 * @result: location to store the result
169 *
170 * Read the current value of one feature of this parser instance
171 *
172 * Returns -1 in case or error, 0 otherwise
173 */
174int
175xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
176 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
177 return(-1);
178
179 if (!strcmp(name, "validate")) {
180 *((int *) result) = ctxt->validate;
181 } else if (!strcmp(name, "keep blanks")) {
182 *((int *) result) = ctxt->keepBlanks;
183 } else if (!strcmp(name, "disable SAX")) {
184 *((int *) result) = ctxt->disableSAX;
185 } else if (!strcmp(name, "fetch external entities")) {
186 *((int *) result) = ctxt->loadsubset;
187 } else if (!strcmp(name, "substitute entities")) {
188 *((int *) result) = ctxt->replaceEntities;
189 } else if (!strcmp(name, "gather line info")) {
190 *((int *) result) = ctxt->record_info;
191 } else if (!strcmp(name, "user data")) {
192 *((void **)result) = ctxt->userData;
193 } else if (!strcmp(name, "is html")) {
194 *((int *) result) = ctxt->html;
195 } else if (!strcmp(name, "is standalone")) {
196 *((int *) result) = ctxt->standalone;
197 } else if (!strcmp(name, "document")) {
198 *((xmlDocPtr *) result) = ctxt->myDoc;
199 } else if (!strcmp(name, "is well formed")) {
200 *((int *) result) = ctxt->wellFormed;
201 } else if (!strcmp(name, "is valid")) {
202 *((int *) result) = ctxt->valid;
203 } else if (!strcmp(name, "SAX block")) {
204 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
205 } else if (!strcmp(name, "SAX function internalSubset")) {
206 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
207 } else if (!strcmp(name, "SAX function isStandalone")) {
208 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
209 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
210 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
211 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
212 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
213 } else if (!strcmp(name, "SAX function resolveEntity")) {
214 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
215 } else if (!strcmp(name, "SAX function getEntity")) {
216 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
217 } else if (!strcmp(name, "SAX function entityDecl")) {
218 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
219 } else if (!strcmp(name, "SAX function notationDecl")) {
220 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
221 } else if (!strcmp(name, "SAX function attributeDecl")) {
222 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
223 } else if (!strcmp(name, "SAX function elementDecl")) {
224 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
225 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
226 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
227 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
228 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
229 } else if (!strcmp(name, "SAX function startDocument")) {
230 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
231 } else if (!strcmp(name, "SAX function endDocument")) {
232 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
233 } else if (!strcmp(name, "SAX function startElement")) {
234 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
235 } else if (!strcmp(name, "SAX function endElement")) {
236 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
237 } else if (!strcmp(name, "SAX function reference")) {
238 *((referenceSAXFunc *) result) = ctxt->sax->reference;
239 } else if (!strcmp(name, "SAX function characters")) {
240 *((charactersSAXFunc *) result) = ctxt->sax->characters;
241 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
242 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
243 } else if (!strcmp(name, "SAX function processingInstruction")) {
244 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
245 } else if (!strcmp(name, "SAX function comment")) {
246 *((commentSAXFunc *) result) = ctxt->sax->comment;
247 } else if (!strcmp(name, "SAX function warning")) {
248 *((warningSAXFunc *) result) = ctxt->sax->warning;
249 } else if (!strcmp(name, "SAX function error")) {
250 *((errorSAXFunc *) result) = ctxt->sax->error;
251 } else if (!strcmp(name, "SAX function fatalError")) {
252 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
253 } else if (!strcmp(name, "SAX function getParameterEntity")) {
254 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
255 } else if (!strcmp(name, "SAX function cdataBlock")) {
256 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
257 } else if (!strcmp(name, "SAX function externalSubset")) {
258 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
259 } else {
260 return(-1);
261 }
262 return(0);
263}
264
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000265/**
Owen Taylor3473f882001-02-23 17:55:21 +0000266 * xmlSetFeature:
267 * @ctxt: an XML/HTML parser context
268 * @name: the feature name
269 * @value: pointer to the location of the new value
270 *
271 * Change the current value of one feature of this parser instance
272 *
273 * Returns -1 in case or error, 0 otherwise
274 */
275int
276xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
277 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
278 return(-1);
279
280 if (!strcmp(name, "validate")) {
281 int newvalidate = *((int *) value);
282 if ((!ctxt->validate) && (newvalidate != 0)) {
283 if (ctxt->vctxt.warning == NULL)
284 ctxt->vctxt.warning = xmlParserValidityWarning;
285 if (ctxt->vctxt.error == NULL)
286 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000287 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000288 }
289 ctxt->validate = newvalidate;
290 } else if (!strcmp(name, "keep blanks")) {
291 ctxt->keepBlanks = *((int *) value);
292 } else if (!strcmp(name, "disable SAX")) {
293 ctxt->disableSAX = *((int *) value);
294 } else if (!strcmp(name, "fetch external entities")) {
295 ctxt->loadsubset = *((int *) value);
296 } else if (!strcmp(name, "substitute entities")) {
297 ctxt->replaceEntities = *((int *) value);
298 } else if (!strcmp(name, "gather line info")) {
299 ctxt->record_info = *((int *) value);
300 } else if (!strcmp(name, "user data")) {
301 ctxt->userData = *((void **)value);
302 } else if (!strcmp(name, "is html")) {
303 ctxt->html = *((int *) value);
304 } else if (!strcmp(name, "is standalone")) {
305 ctxt->standalone = *((int *) value);
306 } else if (!strcmp(name, "document")) {
307 ctxt->myDoc = *((xmlDocPtr *) value);
308 } else if (!strcmp(name, "is well formed")) {
309 ctxt->wellFormed = *((int *) value);
310 } else if (!strcmp(name, "is valid")) {
311 ctxt->valid = *((int *) value);
312 } else if (!strcmp(name, "SAX block")) {
313 ctxt->sax = *((xmlSAXHandlerPtr *) value);
314 } else if (!strcmp(name, "SAX function internalSubset")) {
315 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
316 } else if (!strcmp(name, "SAX function isStandalone")) {
317 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
319 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
321 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function resolveEntity")) {
323 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
324 } else if (!strcmp(name, "SAX function getEntity")) {
325 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function entityDecl")) {
327 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
328 } else if (!strcmp(name, "SAX function notationDecl")) {
329 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function attributeDecl")) {
331 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function elementDecl")) {
333 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
335 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
337 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function startDocument")) {
339 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function endDocument")) {
341 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startElement")) {
343 ctxt->sax->startElement = *((startElementSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endElement")) {
345 ctxt->sax->endElement = *((endElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function reference")) {
347 ctxt->sax->reference = *((referenceSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function characters")) {
349 ctxt->sax->characters = *((charactersSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
351 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function processingInstruction")) {
353 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function comment")) {
355 ctxt->sax->comment = *((commentSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function warning")) {
357 ctxt->sax->warning = *((warningSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function error")) {
359 ctxt->sax->error = *((errorSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function fatalError")) {
361 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function getParameterEntity")) {
363 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
364 } else if (!strcmp(name, "SAX function cdataBlock")) {
365 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function externalSubset")) {
367 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
368 } else {
369 return(-1);
370 }
371 return(0);
372}
373
374/************************************************************************
375 * *
376 * Some functions to avoid too large macros *
377 * *
378 ************************************************************************/
379
380/**
381 * xmlIsChar:
382 * @c: an unicode character (int)
383 *
384 * Check whether the character is allowed by the production
385 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
386 * | [#x10000-#x10FFFF]
387 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
388 * Also available as a macro IS_CHAR()
389 *
390 * Returns 0 if not, non-zero otherwise
391 */
392int
393xmlIsChar(int c) {
394 return(
395 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
396 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
397 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
398 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
399}
400
401/**
402 * xmlIsBlank:
403 * @c: an unicode character (int)
404 *
405 * Check whether the character is allowed by the production
406 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
407 * Also available as a macro IS_BLANK()
408 *
409 * Returns 0 if not, non-zero otherwise
410 */
411int
412xmlIsBlank(int c) {
413 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
414}
415
Owen Taylor3473f882001-02-23 17:55:21 +0000416static int xmlBaseArray[] = {
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
420 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
421 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
423 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
430 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
432 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
433};
434
Daniel Veillard01c13b52002-12-10 15:19:08 +0000435/**
436 * xmlIsBaseChar:
437 * @c: an unicode character (int)
438 *
439 * Check whether the character is allowed by the production
440 * [85] BaseChar ::= ... long list see REC ...
441 *
442 * VI is your friend !
443 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
444 * and
445 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
446 *
447 * Returns 0 if not, non-zero otherwise
448 */
Owen Taylor3473f882001-02-23 17:55:21 +0000449int
450xmlIsBaseChar(int c) {
451 return(
452 (((c) < 0x0100) ? xmlBaseArray[c] :
453 ( /* accelerator */
454 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
455 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
456 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
457 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
458 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
459 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
460 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
461 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
462 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
463 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
464 ((c) == 0x0386) ||
465 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
466 ((c) == 0x038C) ||
467 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
468 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
469 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
470 ((c) == 0x03DA) ||
471 ((c) == 0x03DC) ||
472 ((c) == 0x03DE) ||
473 ((c) == 0x03E0) ||
474 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
475 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
476 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
477 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
478 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
479 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
480 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
481 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
482 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
483 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
484 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
485 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
486 ((c) == 0x0559) ||
487 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
488 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
489 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
490 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
491 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
492 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
493 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
494 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
495 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
496 ((c) == 0x06D5) ||
497 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
498 (((c) >= 0x905) && ( /* accelerator */
499 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
500 ((c) == 0x093D) ||
501 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
502 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
503 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
504 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
505 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
506 ((c) == 0x09B2) ||
507 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
508 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
509 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
510 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
511 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
512 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
513 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
514 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
515 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
516 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
517 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
518 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
519 ((c) == 0x0A5E) ||
520 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
521 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
522 ((c) == 0x0A8D) ||
523 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
524 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
525 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
526 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
527 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
528 ((c) == 0x0ABD) ||
529 ((c) == 0x0AE0) ||
530 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
531 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
532 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
533 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
534 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
535 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
536 ((c) == 0x0B3D) ||
537 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
538 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
539 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
540 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
541 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
542 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
543 ((c) == 0x0B9C) ||
544 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
545 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
546 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
547 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
548 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
549 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
550 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
551 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
552 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
553 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
554 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
555 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
556 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
557 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
558 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
559 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
560 ((c) == 0x0CDE) ||
561 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
562 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
563 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
564 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
565 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
566 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
567 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
568 ((c) == 0x0E30) ||
569 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
570 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
571 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
572 ((c) == 0x0E84) ||
573 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
574 ((c) == 0x0E8A) ||
575 ((c) == 0x0E8D) ||
576 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
577 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
578 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
579 ((c) == 0x0EA5) ||
580 ((c) == 0x0EA7) ||
581 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
582 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
583 ((c) == 0x0EB0) ||
584 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
585 ((c) == 0x0EBD) ||
586 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
587 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
588 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
589 (((c) >= 0x10A0) && ( /* accelerator */
590 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
591 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
592 ((c) == 0x1100) ||
593 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
594 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
595 ((c) == 0x1109) ||
596 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
597 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
598 ((c) == 0x113C) ||
599 ((c) == 0x113E) ||
600 ((c) == 0x1140) ||
601 ((c) == 0x114C) ||
602 ((c) == 0x114E) ||
603 ((c) == 0x1150) ||
604 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
605 ((c) == 0x1159) ||
606 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
607 ((c) == 0x1163) ||
608 ((c) == 0x1165) ||
609 ((c) == 0x1167) ||
610 ((c) == 0x1169) ||
611 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
612 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
613 ((c) == 0x1175) ||
614 ((c) == 0x119E) ||
615 ((c) == 0x11A8) ||
616 ((c) == 0x11AB) ||
617 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
618 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
619 ((c) == 0x11BA) ||
620 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
621 ((c) == 0x11EB) ||
622 ((c) == 0x11F0) ||
623 ((c) == 0x11F9) ||
624 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
625 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
626 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
627 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
628 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
629 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
630 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
631 ((c) == 0x1F59) ||
632 ((c) == 0x1F5B) ||
633 ((c) == 0x1F5D) ||
634 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
635 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
636 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
637 ((c) == 0x1FBE) ||
638 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
639 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
640 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
641 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
642 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
643 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
644 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
645 ((c) == 0x2126) ||
646 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
647 ((c) == 0x212E) ||
648 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
649 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
650 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
651 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
652 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
653}
654
655/**
656 * xmlIsDigit:
657 * @c: an unicode character (int)
658 *
659 * Check whether the character is allowed by the production
660 * [88] Digit ::= ... long list see REC ...
661 *
662 * Returns 0 if not, non-zero otherwise
663 */
664int
665xmlIsDigit(int c) {
666 return(
667 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
668 (((c) >= 0x660) && ( /* accelerator */
669 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
670 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
671 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
672 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
673 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
674 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
675 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
676 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
677 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
678 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
679 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
680 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
681 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
682 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
683}
684
685/**
686 * xmlIsCombining:
687 * @c: an unicode character (int)
688 *
689 * Check whether the character is allowed by the production
690 * [87] CombiningChar ::= ... long list see REC ...
691 *
692 * Returns 0 if not, non-zero otherwise
693 */
694int
695xmlIsCombining(int c) {
696 return(
697 (((c) >= 0x300) && ( /* accelerator */
698 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
699 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
700 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
701 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
702 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
703 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
704 ((c) == 0x05BF) ||
705 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
706 ((c) == 0x05C4) ||
707 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
708 ((c) == 0x0670) ||
709 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
710 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
711 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
712 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
713 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
714 (((c) >= 0x0901) && ( /* accelerator */
715 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
716 ((c) == 0x093C) ||
717 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
718 ((c) == 0x094D) ||
719 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
720 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
721 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
722 ((c) == 0x09BC) ||
723 ((c) == 0x09BE) ||
724 ((c) == 0x09BF) ||
725 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
726 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
727 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
728 ((c) == 0x09D7) ||
729 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
730 (((c) >= 0x0A02) && ( /* accelerator */
731 ((c) == 0x0A02) ||
732 ((c) == 0x0A3C) ||
733 ((c) == 0x0A3E) ||
734 ((c) == 0x0A3F) ||
735 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
736 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
737 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
738 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
739 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
740 ((c) == 0x0ABC) ||
741 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
742 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
743 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
744 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
745 ((c) == 0x0B3C) ||
746 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
747 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
748 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
749 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
750 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
751 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
752 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
753 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
754 ((c) == 0x0BD7) ||
755 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
756 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
757 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
758 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
759 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
760 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
761 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
762 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
763 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
764 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
765 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
766 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
767 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
768 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
769 ((c) == 0x0D57) ||
770 (((c) >= 0x0E31) && ( /* accelerator */
771 ((c) == 0x0E31) ||
772 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
773 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
774 ((c) == 0x0EB1) ||
775 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
776 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
777 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
778 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
779 ((c) == 0x0F35) ||
780 ((c) == 0x0F37) ||
781 ((c) == 0x0F39) ||
782 ((c) == 0x0F3E) ||
783 ((c) == 0x0F3F) ||
784 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
785 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
786 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
787 ((c) == 0x0F97) ||
788 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
789 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
790 ((c) == 0x0FB9) ||
791 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
792 ((c) == 0x20E1) ||
793 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
794 ((c) == 0x3099) ||
795 ((c) == 0x309A))))))))));
796}
797
798/**
799 * xmlIsExtender:
800 * @c: an unicode character (int)
801 *
802 * Check whether the character is allowed by the production
803 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
804 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
805 * [#x309D-#x309E] | [#x30FC-#x30FE]
806 *
807 * Returns 0 if not, non-zero otherwise
808 */
809int
810xmlIsExtender(int c) {
811 switch (c) {
812 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
813 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
814 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
815 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000816 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000817 return 1;
818 default:
819 return 0;
820 }
821}
822
823/**
824 * xmlIsIdeographic:
825 * @c: an unicode character (int)
826 *
827 * Check whether the character is allowed by the production
828 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
829 *
830 * Returns 0 if not, non-zero otherwise
831 */
832int
833xmlIsIdeographic(int c) {
834 return(((c) < 0x0100) ? 0 :
835 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
836 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
837 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
838 ((c) == 0x3007));
839}
840
841/**
842 * xmlIsLetter:
843 * @c: an unicode character (int)
844 *
845 * Check whether the character is allowed by the production
846 * [84] Letter ::= BaseChar | Ideographic
847 *
848 * Returns 0 if not, non-zero otherwise
849 */
850int
851xmlIsLetter(int c) {
852 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
853}
854
855/**
856 * xmlIsPubidChar:
857 * @c: an unicode character (int)
858 *
859 * Check whether the character is allowed by the production
860 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
861 *
862 * Returns 0 if not, non-zero otherwise
863 */
864int
865xmlIsPubidChar(int c) {
866 return(
867 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
868 (((c) >= 'a') && ((c) <= 'z')) ||
869 (((c) >= 'A') && ((c) <= 'Z')) ||
870 (((c) >= '0') && ((c) <= '9')) ||
871 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
872 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
873 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
874 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
875 ((c) == '$') || ((c) == '_') || ((c) == '%'));
876}
877
878/************************************************************************
879 * *
880 * Input handling functions for progressive parsing *
881 * *
882 ************************************************************************/
883
884/* #define DEBUG_INPUT */
885/* #define DEBUG_STACK */
886/* #define DEBUG_PUSH */
887
888
889/* we need to keep enough input to show errors in context */
890#define LINE_LEN 80
891
892#ifdef DEBUG_INPUT
893#define CHECK_BUFFER(in) check_buffer(in)
894
Daniel Veillard01c13b52002-12-10 15:19:08 +0000895static
Owen Taylor3473f882001-02-23 17:55:21 +0000896void check_buffer(xmlParserInputPtr in) {
897 if (in->base != in->buf->buffer->content) {
898 xmlGenericError(xmlGenericErrorContext,
899 "xmlParserInput: base mismatch problem\n");
900 }
901 if (in->cur < in->base) {
902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: cur < base problem\n");
904 }
905 if (in->cur > in->base + in->buf->buffer->use) {
906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur > base + use problem\n");
908 }
909 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
910 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
911 in->buf->buffer->use, in->buf->buffer->size);
912}
913
914#else
915#define CHECK_BUFFER(in)
916#endif
917
918
919/**
920 * xmlParserInputRead:
921 * @in: an XML parser input
922 * @len: an indicative size for the lookahead
923 *
924 * This function refresh the input for the parser. It doesn't try to
925 * preserve pointers to the input buffer, and discard already read data
926 *
927 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
928 * end of this entity
929 */
930int
931xmlParserInputRead(xmlParserInputPtr in, int len) {
932 int ret;
933 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000934 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000935
936#ifdef DEBUG_INPUT
937 xmlGenericError(xmlGenericErrorContext, "Read\n");
938#endif
939 if (in->buf == NULL) return(-1);
940 if (in->base == NULL) return(-1);
941 if (in->cur == NULL) return(-1);
942 if (in->buf->buffer == NULL) return(-1);
943 if (in->buf->readcallback == NULL) return(-1);
944
945 CHECK_BUFFER(in);
946
947 used = in->cur - in->buf->buffer->content;
948 ret = xmlBufferShrink(in->buf->buffer, used);
949 if (ret > 0) {
950 in->cur -= ret;
951 in->consumed += ret;
952 }
953 ret = xmlParserInputBufferRead(in->buf, len);
954 if (in->base != in->buf->buffer->content) {
955 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000956 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000957 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000959 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000960 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000961 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000962 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000963
964 CHECK_BUFFER(in);
965
966 return(ret);
967}
968
969/**
970 * xmlParserInputGrow:
971 * @in: an XML parser input
972 * @len: an indicative size for the lookahead
973 *
974 * This function increase the input for the parser. It tries to
975 * preserve pointers to the input buffer, and keep already read data
976 *
977 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
978 * end of this entity
979 */
980int
981xmlParserInputGrow(xmlParserInputPtr in, int len) {
982 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000983 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000984
985#ifdef DEBUG_INPUT
986 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987#endif
988 if (in->buf == NULL) return(-1);
989 if (in->base == NULL) return(-1);
990 if (in->cur == NULL) return(-1);
991 if (in->buf->buffer == NULL) return(-1);
992
993 CHECK_BUFFER(in);
994
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000995 indx = in->cur - in->base;
996 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000997
998 CHECK_BUFFER(in);
999
1000 return(0);
1001 }
1002 if (in->buf->readcallback != NULL)
1003 ret = xmlParserInputBufferGrow(in->buf, len);
1004 else
1005 return(0);
1006
1007 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001008 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001009 * block, but we use it really as an integer to do some
1010 * pointer arithmetic. Insure will raise it as a bug but in
1011 * that specific case, that's not !
1012 */
1013 if (in->base != in->buf->buffer->content) {
1014 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001015 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001018 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001019 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001020 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001021 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001022
1023 CHECK_BUFFER(in);
1024
1025 return(ret);
1026}
1027
1028/**
1029 * xmlParserInputShrink:
1030 * @in: an XML parser input
1031 *
1032 * This function removes used input for the parser.
1033 */
1034void
1035xmlParserInputShrink(xmlParserInputPtr in) {
1036 int used;
1037 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001038 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001039
1040#ifdef DEBUG_INPUT
1041 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1042#endif
1043 if (in->buf == NULL) return;
1044 if (in->base == NULL) return;
1045 if (in->cur == NULL) return;
1046 if (in->buf->buffer == NULL) return;
1047
1048 CHECK_BUFFER(in);
1049
1050 used = in->cur - in->buf->buffer->content;
1051 /*
1052 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001053 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001054 */
Daniel Veillarda880b122003-04-21 21:36:41 +00001055#if 0
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001056 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001057 return;
Daniel Veillarda880b122003-04-21 21:36:41 +00001058#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001059 if (used > INPUT_CHUNK) {
1060 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1061 if (ret > 0) {
1062 in->cur -= ret;
1063 in->consumed += ret;
1064 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001065 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001066 }
1067
1068 CHECK_BUFFER(in);
1069
1070 if (in->buf->buffer->use > INPUT_CHUNK) {
1071 return;
1072 }
1073 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1074 if (in->base != in->buf->buffer->content) {
1075 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001076 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001079 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001080 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001081 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001082 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001083
1084 CHECK_BUFFER(in);
1085}
1086
1087/************************************************************************
1088 * *
1089 * UTF8 character input and related functions *
1090 * *
1091 ************************************************************************/
1092
1093/**
1094 * xmlNextChar:
1095 * @ctxt: the XML parser context
1096 *
1097 * Skip to the next char input char.
1098 */
1099
1100void
Daniel Veillard77a90a72003-03-22 00:04:05 +00001101xmlNextChar(xmlParserCtxtPtr ctxt)
1102{
Owen Taylor3473f882001-02-23 17:55:21 +00001103 if (ctxt->instate == XML_PARSER_EOF)
Daniel Veillard77a90a72003-03-22 00:04:05 +00001104 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001105
Daniel Veillardfdc91562002-07-01 21:52:03 +00001106 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001107 if ((*ctxt->input->cur == 0) &&
1108 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1109 (ctxt->instate != XML_PARSER_COMMENT)) {
1110 /*
1111 * If we are at the end of the current entity and
1112 * the context allows it, we pop consumed entities
1113 * automatically.
1114 * the auto closing should be blocked in other cases
1115 */
1116 xmlPopInput(ctxt);
1117 } else {
1118 const unsigned char *cur;
1119 unsigned char c;
Owen Taylor3473f882001-02-23 17:55:21 +00001120
Daniel Veillard77a90a72003-03-22 00:04:05 +00001121 /*
1122 * 2.11 End-of-Line Handling
1123 * the literal two-character sequence "#xD#xA" or a standalone
1124 * literal #xD, an XML processor must pass to the application
1125 * the single character #xA.
1126 */
1127 if (*(ctxt->input->cur) == '\n') {
1128 ctxt->input->line++;
1129 ctxt->input->col = 1;
1130 } else
1131 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001132
Daniel Veillard77a90a72003-03-22 00:04:05 +00001133 /*
1134 * We are supposed to handle UTF8, check it's valid
1135 * From rfc2044: encoding of the Unicode values on UTF-8:
1136 *
1137 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1138 * 0000 0000-0000 007F 0xxxxxxx
1139 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1140 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1141 *
1142 * Check for the 0x110000 limit too
1143 */
1144 cur = ctxt->input->cur;
1145
1146 c = *cur;
1147 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001148 if (c == 0xC0)
1149 goto encoding_error;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001150 if (cur[1] == 0)
1151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152 if ((cur[1] & 0xc0) != 0x80)
1153 goto encoding_error;
1154 if ((c & 0xe0) == 0xe0) {
1155 unsigned int val;
1156
1157 if (cur[2] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if ((cur[2] & 0xc0) != 0x80)
1160 goto encoding_error;
1161 if ((c & 0xf0) == 0xf0) {
1162 if (cur[3] == 0)
1163 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1164 if (((c & 0xf8) != 0xf0) ||
1165 ((cur[3] & 0xc0) != 0x80))
1166 goto encoding_error;
1167 /* 4-byte code */
1168 ctxt->input->cur += 4;
1169 val = (cur[0] & 0x7) << 18;
1170 val |= (cur[1] & 0x3f) << 12;
1171 val |= (cur[2] & 0x3f) << 6;
1172 val |= cur[3] & 0x3f;
1173 } else {
1174 /* 3-byte code */
1175 ctxt->input->cur += 3;
1176 val = (cur[0] & 0xf) << 12;
1177 val |= (cur[1] & 0x3f) << 6;
1178 val |= cur[2] & 0x3f;
1179 }
1180 if (((val > 0xd7ff) && (val < 0xe000)) ||
1181 ((val > 0xfffd) && (val < 0x10000)) ||
1182 (val >= 0x110000)) {
1183 if ((ctxt->sax != NULL) &&
1184 (ctxt->sax->error != NULL))
1185 ctxt->sax->error(ctxt->userData,
1186 "Char 0x%X out of allowed range\n",
1187 val);
1188 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1189 ctxt->wellFormed = 0;
1190 if (ctxt->recovery == 0)
1191 ctxt->disableSAX = 1;
1192 }
1193 } else
1194 /* 2-byte code */
1195 ctxt->input->cur += 2;
1196 } else
1197 /* 1-byte code */
1198 ctxt->input->cur++;
1199
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
Owen Taylor3473f882001-02-23 17:55:21 +00001204 } else {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001205 /*
1206 * Assume it's a fixed length encoding (1) with
1207 * a compatible encoding for the ASCII set, since
1208 * XML constructs only use < 128 chars
1209 */
1210
1211 if (*(ctxt->input->cur) == '\n') {
1212 ctxt->input->line++;
1213 ctxt->input->col = 1;
1214 } else
1215 ctxt->input->col++;
1216 ctxt->input->cur++;
1217 ctxt->nbChars++;
1218 if (*ctxt->input->cur == 0)
1219 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001220 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001221 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001222 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001223 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001224 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
Daniel Veillard77a90a72003-03-22 00:04:05 +00001225 xmlPopInput(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001226 return;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001227 encoding_error:
Owen Taylor3473f882001-02-23 17:55:21 +00001228 /*
1229 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001230 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * declaration header. Report the error and switch the encoding
1232 * to ISO-Latin-1 (if you don't like this policy, just declare the
1233 * encoding !)
1234 */
1235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard77a90a72003-03-22 00:04:05 +00001236 ctxt->sax->error(ctxt->userData,
1237 "Input is not proper UTF-8, indicate encoding !\n");
1238 ctxt->sax->error(ctxt->userData,
1239 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1240 ctxt->input->cur[0], ctxt->input->cur[1],
1241 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001242 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001243 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001244 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1245
Daniel Veillard77a90a72003-03-22 00:04:05 +00001246 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001247 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001248 return;
1249}
1250
1251/**
1252 * xmlCurrentChar:
1253 * @ctxt: the XML parser context
1254 * @len: pointer to the length of the char read
1255 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001256 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001257 * bytes in the input buffer. Implement the end of line normalization:
1258 * 2.11 End-of-Line Handling
1259 * Wherever an external parsed entity or the literal entity value
1260 * of an internal parsed entity contains either the literal two-character
1261 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1262 * must pass to the application the single character #xA.
1263 * This behavior can conveniently be produced by normalizing all
1264 * line breaks to #xA on input, before parsing.)
1265 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001266 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001267 */
1268
1269int
1270xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1271 if (ctxt->instate == XML_PARSER_EOF)
1272 return(0);
1273
Daniel Veillard561b7f82002-03-20 21:55:57 +00001274 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1275 *len = 1;
1276 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001277 }
1278 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1279 /*
1280 * We are supposed to handle UTF8, check it's valid
1281 * From rfc2044: encoding of the Unicode values on UTF-8:
1282 *
1283 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1284 * 0000 0000-0000 007F 0xxxxxxx
1285 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1286 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1287 *
1288 * Check for the 0x110000 limit too
1289 */
1290 const unsigned char *cur = ctxt->input->cur;
1291 unsigned char c;
1292 unsigned int val;
1293
1294 c = *cur;
1295 if (c & 0x80) {
Daniel Veillard0e0f37a2003-05-20 12:22:41 +00001296 if (c == 0xC0)
1297 goto encoding_error;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001298 if (cur[1] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001301 goto encoding_error;
1302 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001303
1304 if (cur[2] == 0)
1305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1306 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001307 goto encoding_error;
1308 if ((c & 0xf0) == 0xf0) {
1309 if (cur[3] == 0)
1310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001311 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001312 ((cur[3] & 0xc0) != 0x80))
1313 goto encoding_error;
1314 /* 4-byte code */
1315 *len = 4;
1316 val = (cur[0] & 0x7) << 18;
1317 val |= (cur[1] & 0x3f) << 12;
1318 val |= (cur[2] & 0x3f) << 6;
1319 val |= cur[3] & 0x3f;
1320 } else {
1321 /* 3-byte code */
1322 *len = 3;
1323 val = (cur[0] & 0xf) << 12;
1324 val |= (cur[1] & 0x3f) << 6;
1325 val |= cur[2] & 0x3f;
1326 }
1327 } else {
1328 /* 2-byte code */
1329 *len = 2;
1330 val = (cur[0] & 0x1f) << 6;
1331 val |= cur[1] & 0x3f;
1332 }
1333 if (!IS_CHAR(val)) {
1334 if ((ctxt->sax != NULL) &&
1335 (ctxt->sax->error != NULL))
1336 ctxt->sax->error(ctxt->userData,
1337 "Char 0x%X out of allowed range\n", val);
1338 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001341 }
1342 return(val);
1343 } else {
1344 /* 1-byte code */
1345 *len = 1;
1346 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001347 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001348 ctxt->nbChars++;
1349 ctxt->input->cur++;
1350 }
1351 return(0xA);
1352 }
1353 return((int) *ctxt->input->cur);
1354 }
1355 }
1356 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001357 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001358 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001359 * XML constructs only use < 128 chars
1360 */
1361 *len = 1;
1362 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001363 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001364 ctxt->nbChars++;
1365 ctxt->input->cur++;
1366 }
1367 return(0xA);
1368 }
1369 return((int) *ctxt->input->cur);
1370encoding_error:
1371 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001372 * An encoding problem may arise from a truncated input buffer
1373 * splitting a character in the middle. In that case do not raise
1374 * an error but return 0 to endicate an end of stream problem
1375 */
1376 if (ctxt->input->end - ctxt->input->cur < 4) {
1377 *len = 0;
1378 return(0);
1379 }
1380
1381 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001382 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001383 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001384 * declaration header. Report the error and switch the encoding
1385 * to ISO-Latin-1 (if you don't like this policy, just declare the
1386 * encoding !)
1387 */
1388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1389 ctxt->sax->error(ctxt->userData,
1390 "Input is not proper UTF-8, indicate encoding !\n");
1391 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001392 ctxt->input->cur[0], ctxt->input->cur[1],
1393 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001394 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001395 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001396 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1397
1398 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1399 *len = 1;
1400 return((int) *ctxt->input->cur);
1401}
1402
1403/**
1404 * xmlStringCurrentChar:
1405 * @ctxt: the XML parser context
1406 * @cur: pointer to the beginning of the char
1407 * @len: pointer to the length of the char read
1408 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001409 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001410 * bytes in the input buffer.
1411 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001412 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001413 */
1414
1415int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001416xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1417{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001418 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001419 /*
1420 * We are supposed to handle UTF8, check it's valid
1421 * From rfc2044: encoding of the Unicode values on UTF-8:
1422 *
1423 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1424 * 0000 0000-0000 007F 0xxxxxxx
1425 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1426 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1427 *
1428 * Check for the 0x110000 limit too
1429 */
1430 unsigned char c;
1431 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001432
Daniel Veillardd8224e02002-01-13 15:43:22 +00001433 c = *cur;
1434 if (c & 0x80) {
1435 if ((cur[1] & 0xc0) != 0x80)
1436 goto encoding_error;
1437 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001438
Daniel Veillardd8224e02002-01-13 15:43:22 +00001439 if ((cur[2] & 0xc0) != 0x80)
1440 goto encoding_error;
1441 if ((c & 0xf0) == 0xf0) {
1442 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1443 goto encoding_error;
1444 /* 4-byte code */
1445 *len = 4;
1446 val = (cur[0] & 0x7) << 18;
1447 val |= (cur[1] & 0x3f) << 12;
1448 val |= (cur[2] & 0x3f) << 6;
1449 val |= cur[3] & 0x3f;
1450 } else {
1451 /* 3-byte code */
1452 *len = 3;
1453 val = (cur[0] & 0xf) << 12;
1454 val |= (cur[1] & 0x3f) << 6;
1455 val |= cur[2] & 0x3f;
1456 }
1457 } else {
1458 /* 2-byte code */
1459 *len = 2;
1460 val = (cur[0] & 0x1f) << 6;
1461 val |= cur[1] & 0x3f;
1462 }
1463 if (!IS_CHAR(val)) {
1464 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1465 (ctxt->sax->error != NULL))
1466 ctxt->sax->error(ctxt->userData,
1467 "Char 0x%X out of allowed range\n",
1468 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001469 if (ctxt != NULL) {
1470 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1471 ctxt->wellFormed = 0;
1472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1473 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001474 }
1475 return (val);
1476 } else {
1477 /* 1-byte code */
1478 *len = 1;
1479 return ((int) *cur);
1480 }
Owen Taylor3473f882001-02-23 17:55:21 +00001481 }
1482 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001483 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001484 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001485 * XML constructs only use < 128 chars
1486 */
1487 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001488 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001489encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001490
Owen Taylor3473f882001-02-23 17:55:21 +00001491 /*
1492 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001493 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001494 * declaration header. Report the error and switch the encoding
1495 * to ISO-Latin-1 (if you don't like this policy, just declare the
1496 * encoding !)
1497 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001498 if (ctxt != NULL) {
1499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1500 ctxt->sax->error(ctxt->userData,
1501 "Input is not proper UTF-8, indicate encoding !\n");
1502 ctxt->sax->error(ctxt->userData,
1503 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1504 ctxt->input->cur[0], ctxt->input->cur[1],
1505 ctxt->input->cur[2], ctxt->input->cur[3]);
1506 }
1507 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001508 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001509 }
Owen Taylor3473f882001-02-23 17:55:21 +00001510
1511 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001512 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001513}
1514
1515/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001517 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001518 * @val: the char value
1519 *
1520 * append the char value in the array
1521 *
1522 * Returns the number of xmlChar written
1523 */
Owen Taylor3473f882001-02-23 17:55:21 +00001524int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001525xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001526 /*
1527 * We are supposed to handle UTF8, check it's valid
1528 * From rfc2044: encoding of the Unicode values on UTF-8:
1529 *
1530 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1531 * 0000 0000-0000 007F 0xxxxxxx
1532 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1533 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1534 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001535 if (val >= 0x80) {
1536 xmlChar *savedout = out;
1537 int bits;
1538 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1539 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1540 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1541 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001542 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001543 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001544 val);
1545 return(0);
1546 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001547 for ( ; bits >= 0; bits-= 6)
1548 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1549 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001550 }
1551 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001552 return 1;
1553}
1554
1555/**
1556 * xmlCopyChar:
1557 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001558 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001559 * @val: the char value
1560 *
1561 * append the char value in the array
1562 *
1563 * Returns the number of xmlChar written
1564 */
1565
1566int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001567xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001568 /* the len parameter is ignored */
1569 if (val >= 0x80) {
1570 return(xmlCopyCharMultiByte (out, val));
1571 }
1572 *out = (xmlChar) val;
1573 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001574}
1575
1576/************************************************************************
1577 * *
1578 * Commodity functions to switch encodings *
1579 * *
1580 ************************************************************************/
1581
1582/**
1583 * xmlSwitchEncoding:
1584 * @ctxt: the parser context
1585 * @enc: the encoding value (number)
1586 *
1587 * change the input functions when discovering the character encoding
1588 * of a given entity.
1589 *
1590 * Returns 0 in case of success, -1 otherwise
1591 */
1592int
1593xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1594{
1595 xmlCharEncodingHandlerPtr handler;
1596
1597 switch (enc) {
1598 case XML_CHAR_ENCODING_ERROR:
1599 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1601 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001604 break;
1605 case XML_CHAR_ENCODING_NONE:
1606 /* let's assume it's UTF-8 without the XML decl */
1607 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1608 return(0);
1609 case XML_CHAR_ENCODING_UTF8:
1610 /* default encoding, no conversion should be needed */
1611 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001612
1613 /*
1614 * Errata on XML-1.0 June 20 2001
1615 * Specific handling of the Byte Order Mark for
1616 * UTF-8
1617 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001618 if ((ctxt->input != NULL) &&
1619 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001620 (ctxt->input->cur[1] == 0xBB) &&
1621 (ctxt->input->cur[2] == 0xBF)) {
1622 ctxt->input->cur += 3;
1623 }
Owen Taylor3473f882001-02-23 17:55:21 +00001624 return(0);
Daniel Veillard2dcb9372003-07-16 21:18:19 +00001625 case XML_CHAR_ENCODING_UTF16LE:
1626 case XML_CHAR_ENCODING_UTF16BE:
1627 /*The raw input characters are encoded
1628 *in UTF-16. As we expect this function
1629 *to be called after xmlCharEncInFunc, we expect
1630 *ctxt->input->cur to contain UTF-8 encoded characters.
1631 *So the raw UTF16 Byte Order Mark
1632 *has also been converted into
1633 *an UTF-8 BOM. Let's skip that BOM.
1634 */
1635 if ((ctxt->input != NULL) &&
1636 (ctxt->input->cur[0] == 0xEF) &&
1637 (ctxt->input->cur[1] == 0xBB) &&
1638 (ctxt->input->cur[2] == 0xBF)) {
1639 ctxt->input->cur += 3;
1640 }
1641 break ;
Owen Taylor3473f882001-02-23 17:55:21 +00001642 default:
1643 break;
1644 }
1645 handler = xmlGetCharEncodingHandler(enc);
1646 if (handler == NULL) {
1647 /*
1648 * Default handlers.
1649 */
1650 switch (enc) {
1651 case XML_CHAR_ENCODING_ERROR:
1652 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1655 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001656 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001657 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1658 break;
1659 case XML_CHAR_ENCODING_NONE:
1660 /* let's assume it's UTF-8 without the XML decl */
1661 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1662 return(0);
1663 case XML_CHAR_ENCODING_UTF8:
1664 case XML_CHAR_ENCODING_ASCII:
1665 /* default encoding, no conversion should be needed */
1666 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1667 return(0);
1668 case XML_CHAR_ENCODING_UTF16LE:
1669 break;
1670 case XML_CHAR_ENCODING_UTF16BE:
1671 break;
1672 case XML_CHAR_ENCODING_UCS4LE:
1673 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1675 ctxt->sax->error(ctxt->userData,
1676 "char encoding USC4 little endian not supported\n");
1677 break;
1678 case XML_CHAR_ENCODING_UCS4BE:
1679 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1681 ctxt->sax->error(ctxt->userData,
1682 "char encoding USC4 big endian not supported\n");
1683 break;
1684 case XML_CHAR_ENCODING_EBCDIC:
1685 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1687 ctxt->sax->error(ctxt->userData,
1688 "char encoding EBCDIC not supported\n");
1689 break;
1690 case XML_CHAR_ENCODING_UCS4_2143:
1691 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693 ctxt->sax->error(ctxt->userData,
1694 "char encoding UCS4 2143 not supported\n");
1695 break;
1696 case XML_CHAR_ENCODING_UCS4_3412:
1697 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1699 ctxt->sax->error(ctxt->userData,
1700 "char encoding UCS4 3412 not supported\n");
1701 break;
1702 case XML_CHAR_ENCODING_UCS2:
1703 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1705 ctxt->sax->error(ctxt->userData,
1706 "char encoding UCS2 not supported\n");
1707 break;
1708 case XML_CHAR_ENCODING_8859_1:
1709 case XML_CHAR_ENCODING_8859_2:
1710 case XML_CHAR_ENCODING_8859_3:
1711 case XML_CHAR_ENCODING_8859_4:
1712 case XML_CHAR_ENCODING_8859_5:
1713 case XML_CHAR_ENCODING_8859_6:
1714 case XML_CHAR_ENCODING_8859_7:
1715 case XML_CHAR_ENCODING_8859_8:
1716 case XML_CHAR_ENCODING_8859_9:
1717 /*
1718 * We used to keep the internal content in the
1719 * document encoding however this turns being unmaintainable
1720 * So xmlGetCharEncodingHandler() will return non-null
1721 * values for this now.
1722 */
1723 if ((ctxt->inputNr == 1) &&
1724 (ctxt->encoding == NULL) &&
1725 (ctxt->input->encoding != NULL)) {
1726 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1727 }
1728 ctxt->charset = enc;
1729 return(0);
1730 case XML_CHAR_ENCODING_2022_JP:
1731 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1733 ctxt->sax->error(ctxt->userData,
1734 "char encoding ISO-2022-JPnot supported\n");
1735 break;
1736 case XML_CHAR_ENCODING_SHIFT_JIS:
1737 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1739 ctxt->sax->error(ctxt->userData,
1740 "char encoding Shift_JIS not supported\n");
1741 break;
1742 case XML_CHAR_ENCODING_EUC_JP:
1743 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1745 ctxt->sax->error(ctxt->userData,
1746 "char encoding EUC-JPnot supported\n");
1747 break;
1748 }
1749 }
1750 if (handler == NULL)
1751 return(-1);
1752 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1753 return(xmlSwitchToEncoding(ctxt, handler));
1754}
1755
1756/**
1757 * xmlSwitchToEncoding:
1758 * @ctxt: the parser context
1759 * @handler: the encoding handler
1760 *
1761 * change the input functions when discovering the character encoding
1762 * of a given entity.
1763 *
1764 * Returns 0 in case of success, -1 otherwise
1765 */
1766int
1767xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1768{
1769 int nbchars;
1770
1771 if (handler != NULL) {
1772 if (ctxt->input != NULL) {
1773 if (ctxt->input->buf != NULL) {
1774 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001775 /*
1776 * Check in case the auto encoding detetection triggered
1777 * in already.
1778 */
Owen Taylor3473f882001-02-23 17:55:21 +00001779 if (ctxt->input->buf->encoder == handler)
1780 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001781
1782 /*
1783 * "UTF-16" can be used for both LE and BE
Daniel Veillard878eab02002-02-19 13:46:09 +00001784 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1785 BAD_CAST "UTF-16", 6)) &&
1786 (!xmlStrncmp(BAD_CAST handler->name,
1787 BAD_CAST "UTF-16", 6))) {
1788 return(0);
1789 }
Daniel Veillarda6874ca2003-07-29 16:47:24 +00001790 */
Daniel Veillard878eab02002-02-19 13:46:09 +00001791
Owen Taylor3473f882001-02-23 17:55:21 +00001792 /*
1793 * Note: this is a bit dangerous, but that's what it
1794 * takes to use nearly compatible signature for different
1795 * encodings.
1796 */
1797 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1798 ctxt->input->buf->encoder = handler;
1799 return(0);
1800 }
1801 ctxt->input->buf->encoder = handler;
1802
1803 /*
1804 * Is there already some content down the pipe to convert ?
1805 */
1806 if ((ctxt->input->buf->buffer != NULL) &&
1807 (ctxt->input->buf->buffer->use > 0)) {
1808 int processed;
1809
1810 /*
1811 * Specific handling of the Byte Order Mark for
1812 * UTF-16
1813 */
1814 if ((handler->name != NULL) &&
1815 (!strcmp(handler->name, "UTF-16LE")) &&
1816 (ctxt->input->cur[0] == 0xFF) &&
1817 (ctxt->input->cur[1] == 0xFE)) {
1818 ctxt->input->cur += 2;
1819 }
1820 if ((handler->name != NULL) &&
1821 (!strcmp(handler->name, "UTF-16BE")) &&
1822 (ctxt->input->cur[0] == 0xFE) &&
1823 (ctxt->input->cur[1] == 0xFF)) {
1824 ctxt->input->cur += 2;
1825 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001826 /*
1827 * Errata on XML-1.0 June 20 2001
1828 * Specific handling of the Byte Order Mark for
1829 * UTF-8
1830 */
1831 if ((handler->name != NULL) &&
1832 (!strcmp(handler->name, "UTF-8")) &&
1833 (ctxt->input->cur[0] == 0xEF) &&
1834 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001835 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001836 ctxt->input->cur += 3;
1837 }
Owen Taylor3473f882001-02-23 17:55:21 +00001838
1839 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001840 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001841 * Move it as the raw buffer and create a new input buffer
1842 */
1843 processed = ctxt->input->cur - ctxt->input->base;
1844 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1845 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1846 ctxt->input->buf->buffer = xmlBufferCreate();
1847
1848 if (ctxt->html) {
1849 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001850 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001851 */
1852 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1853 ctxt->input->buf->buffer,
1854 ctxt->input->buf->raw);
1855 } else {
1856 /*
1857 * convert just enough to get
1858 * '<?xml version="1.0" encoding="xxx"?>'
1859 * parsed with the autodetected encoding
1860 * into the parser reading buffer.
1861 */
1862 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1863 ctxt->input->buf->buffer,
1864 ctxt->input->buf->raw);
1865 }
1866 if (nbchars < 0) {
1867 xmlGenericError(xmlGenericErrorContext,
1868 "xmlSwitchToEncoding: encoder error\n");
1869 return(-1);
1870 }
1871 ctxt->input->base =
1872 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001873 ctxt->input->end =
1874 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001875
1876 }
1877 return(0);
1878 } else {
1879 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1880 /*
1881 * When parsing a static memory array one must know the
1882 * size to be able to convert the buffer.
1883 */
1884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1885 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001886 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001887 return(-1);
1888 } else {
1889 int processed;
1890
1891 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001892 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001893 * Move it as the raw buffer and create a new input buffer
1894 */
1895 processed = ctxt->input->cur - ctxt->input->base;
1896
1897 ctxt->input->buf->raw = xmlBufferCreate();
1898 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1899 ctxt->input->length - processed);
1900 ctxt->input->buf->buffer = xmlBufferCreate();
1901
1902 /*
1903 * convert as much as possible of the raw input
1904 * to the parser reading buffer.
1905 */
1906 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1907 ctxt->input->buf->buffer,
1908 ctxt->input->buf->raw);
1909 if (nbchars < 0) {
1910 xmlGenericError(xmlGenericErrorContext,
1911 "xmlSwitchToEncoding: encoder error\n");
1912 return(-1);
1913 }
1914
1915 /*
1916 * Conversion succeeded, get rid of the old buffer
1917 */
1918 if ((ctxt->input->free != NULL) &&
1919 (ctxt->input->base != NULL))
1920 ctxt->input->free((xmlChar *) ctxt->input->base);
1921 ctxt->input->base =
1922 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001923 ctxt->input->end =
1924 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 }
1927 } else {
1928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1929 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001930 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001931 return(-1);
1932 }
1933 /*
1934 * The parsing is now done in UTF8 natively
1935 */
1936 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1937 } else
1938 return(-1);
1939 return(0);
1940
1941}
1942
1943/************************************************************************
1944 * *
1945 * Commodity functions to handle entities processing *
1946 * *
1947 ************************************************************************/
1948
1949/**
1950 * xmlFreeInputStream:
1951 * @input: an xmlParserInputPtr
1952 *
1953 * Free up an input stream.
1954 */
1955void
1956xmlFreeInputStream(xmlParserInputPtr input) {
1957 if (input == NULL) return;
1958
1959 if (input->filename != NULL) xmlFree((char *) input->filename);
1960 if (input->directory != NULL) xmlFree((char *) input->directory);
1961 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1962 if (input->version != NULL) xmlFree((char *) input->version);
1963 if ((input->free != NULL) && (input->base != NULL))
1964 input->free((xmlChar *) input->base);
1965 if (input->buf != NULL)
1966 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001967 xmlFree(input);
1968}
1969
1970/**
1971 * xmlNewInputStream:
1972 * @ctxt: an XML parser context
1973 *
1974 * Create a new input stream structure
1975 * Returns the new input stream or NULL
1976 */
1977xmlParserInputPtr
1978xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1979 xmlParserInputPtr input;
1980
1981 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1982 if (input == NULL) {
1983 if (ctxt != NULL) {
1984 ctxt->errNo = XML_ERR_NO_MEMORY;
1985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1986 ctxt->sax->error(ctxt->userData,
1987 "malloc: couldn't allocate a new input stream\n");
1988 ctxt->errNo = XML_ERR_NO_MEMORY;
1989 }
1990 return(NULL);
1991 }
1992 memset(input, 0, sizeof(xmlParserInput));
1993 input->line = 1;
1994 input->col = 1;
1995 input->standalone = -1;
1996 return(input);
1997}
1998
1999/**
2000 * xmlNewIOInputStream:
2001 * @ctxt: an XML parser context
2002 * @input: an I/O Input
2003 * @enc: the charset encoding if known
2004 *
2005 * Create a new input stream structure encapsulating the @input into
2006 * a stream suitable for the parser.
2007 *
2008 * Returns the new input stream or NULL
2009 */
2010xmlParserInputPtr
2011xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
2012 xmlCharEncoding enc) {
2013 xmlParserInputPtr inputStream;
2014
2015 if (xmlParserDebugEntities)
2016 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
2017 inputStream = xmlNewInputStream(ctxt);
2018 if (inputStream == NULL) {
2019 return(NULL);
2020 }
2021 inputStream->filename = NULL;
2022 inputStream->buf = input;
2023 inputStream->base = inputStream->buf->buffer->content;
2024 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002025 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (enc != XML_CHAR_ENCODING_NONE) {
2027 xmlSwitchEncoding(ctxt, enc);
2028 }
2029
2030 return(inputStream);
2031}
2032
2033/**
2034 * xmlNewEntityInputStream:
2035 * @ctxt: an XML parser context
2036 * @entity: an Entity pointer
2037 *
2038 * Create a new input stream based on an xmlEntityPtr
2039 *
2040 * Returns the new input stream or NULL
2041 */
2042xmlParserInputPtr
2043xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2044 xmlParserInputPtr input;
2045
2046 if (entity == NULL) {
2047 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2049 ctxt->sax->error(ctxt->userData,
2050 "internal: xmlNewEntityInputStream entity = NULL\n");
2051 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2052 return(NULL);
2053 }
2054 if (xmlParserDebugEntities)
2055 xmlGenericError(xmlGenericErrorContext,
2056 "new input from entity: %s\n", entity->name);
2057 if (entity->content == NULL) {
2058 switch (entity->etype) {
2059 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2060 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2062 ctxt->sax->error(ctxt->userData,
2063 "xmlNewEntityInputStream unparsed entity !\n");
2064 break;
2065 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2066 case XML_EXTERNAL_PARAMETER_ENTITY:
2067 return(xmlLoadExternalEntity((char *) entity->URI,
2068 (char *) entity->ExternalID, ctxt));
2069 case XML_INTERNAL_GENERAL_ENTITY:
2070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2071 ctxt->sax->error(ctxt->userData,
2072 "Internal entity %s without content !\n", entity->name);
2073 break;
2074 case XML_INTERNAL_PARAMETER_ENTITY:
2075 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2077 ctxt->sax->error(ctxt->userData,
2078 "Internal parameter entity %s without content !\n", entity->name);
2079 break;
2080 case XML_INTERNAL_PREDEFINED_ENTITY:
2081 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2083 ctxt->sax->error(ctxt->userData,
2084 "Predefined entity %s without content !\n", entity->name);
2085 break;
2086 }
2087 return(NULL);
2088 }
2089 input = xmlNewInputStream(ctxt);
2090 if (input == NULL) {
2091 return(NULL);
2092 }
2093 input->filename = (char *) entity->URI;
2094 input->base = entity->content;
2095 input->cur = entity->content;
2096 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002097 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002098 return(input);
2099}
2100
2101/**
2102 * xmlNewStringInputStream:
2103 * @ctxt: an XML parser context
2104 * @buffer: an memory buffer
2105 *
2106 * Create a new input stream based on a memory buffer.
2107 * Returns the new input stream
2108 */
2109xmlParserInputPtr
2110xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2111 xmlParserInputPtr input;
2112
2113 if (buffer == NULL) {
2114 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2116 ctxt->sax->error(ctxt->userData,
2117 "internal: xmlNewStringInputStream string = NULL\n");
2118 return(NULL);
2119 }
2120 if (xmlParserDebugEntities)
2121 xmlGenericError(xmlGenericErrorContext,
2122 "new fixed input: %.30s\n", buffer);
2123 input = xmlNewInputStream(ctxt);
2124 if (input == NULL) {
2125 return(NULL);
2126 }
2127 input->base = buffer;
2128 input->cur = buffer;
2129 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002130 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002131 return(input);
2132}
2133
2134/**
2135 * xmlNewInputFromFile:
2136 * @ctxt: an XML parser context
2137 * @filename: the filename to use as entity
2138 *
2139 * Create a new input stream based on a file.
2140 *
2141 * Returns the new input stream or NULL in case of error
2142 */
2143xmlParserInputPtr
2144xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2145 xmlParserInputBufferPtr buf;
2146 xmlParserInputPtr inputStream;
2147 char *directory = NULL;
2148 xmlChar *URI = NULL;
2149
2150 if (xmlParserDebugEntities)
2151 xmlGenericError(xmlGenericErrorContext,
2152 "new input from file: %s\n", filename);
2153 if (ctxt == NULL) return(NULL);
2154 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2155 if (buf == NULL)
2156 return(NULL);
2157
2158 URI = xmlStrdup((xmlChar *) filename);
2159 directory = xmlParserGetDirectory((const char *) URI);
2160
2161 inputStream = xmlNewInputStream(ctxt);
2162 if (inputStream == NULL) {
2163 if (directory != NULL) xmlFree((char *) directory);
2164 if (URI != NULL) xmlFree((char *) URI);
2165 return(NULL);
2166 }
2167
2168 inputStream->filename = (const char *) URI;
2169 inputStream->directory = directory;
2170 inputStream->buf = buf;
2171
2172 inputStream->base = inputStream->buf->buffer->content;
2173 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002174 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002175 if ((ctxt->directory == NULL) && (directory != NULL))
2176 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2177 return(inputStream);
2178}
2179
2180/************************************************************************
2181 * *
2182 * Commodity functions to handle parser contexts *
2183 * *
2184 ************************************************************************/
2185
2186/**
2187 * xmlInitParserCtxt:
2188 * @ctxt: an XML parser context
2189 *
2190 * Initialize a parser context
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002191 *
2192 * Returns 0 in case of success and -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +00002193 */
2194
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002195int
Owen Taylor3473f882001-02-23 17:55:21 +00002196xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2197{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002198 if(ctxt==NULL) {
2199 xmlGenericError(xmlGenericErrorContext,
2200 "xmlInitParserCtxt: NULL context given\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002201 return(-1);
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002202 }
2203
Owen Taylor3473f882001-02-23 17:55:21 +00002204 xmlDefaultSAXHandlerInit();
2205
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002206 ctxt->dict = xmlDictCreate();
2207 if (ctxt->dict == NULL) {
2208 xmlGenericError(xmlGenericErrorContext,
2209 "xmlInitParserCtxt: out of memory\n");
2210 return(-1);
2211 }
William M. Brack8b2c7f12002-11-22 05:07:29 +00002212 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2213 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002214 xmlGenericError(xmlGenericErrorContext,
2215 "xmlInitParserCtxt: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002216 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002217 }
2218 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002219 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002220
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002221 ctxt->maxatts = 0;
2222 ctxt->atts = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002223 /* Allocate the Input stack */
2224 ctxt->inputTab = (xmlParserInputPtr *)
2225 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2226 if (ctxt->inputTab == NULL) {
2227 xmlGenericError(xmlGenericErrorContext,
2228 "xmlInitParserCtxt: out of memory\n");
2229 ctxt->inputNr = 0;
2230 ctxt->inputMax = 0;
2231 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002232 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002233 }
2234 ctxt->inputNr = 0;
2235 ctxt->inputMax = 5;
2236 ctxt->input = NULL;
2237
2238 ctxt->version = NULL;
2239 ctxt->encoding = NULL;
2240 ctxt->standalone = -1;
2241 ctxt->hasExternalSubset = 0;
2242 ctxt->hasPErefs = 0;
2243 ctxt->html = 0;
2244 ctxt->external = 0;
2245 ctxt->instate = XML_PARSER_START;
2246 ctxt->token = 0;
2247 ctxt->directory = NULL;
2248
2249 /* Allocate the Node stack */
2250 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2251 if (ctxt->nodeTab == NULL) {
2252 xmlGenericError(xmlGenericErrorContext,
2253 "xmlInitParserCtxt: out of memory\n");
2254 ctxt->nodeNr = 0;
2255 ctxt->nodeMax = 0;
2256 ctxt->node = NULL;
2257 ctxt->inputNr = 0;
2258 ctxt->inputMax = 0;
2259 ctxt->input = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002260 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002261 }
2262 ctxt->nodeNr = 0;
2263 ctxt->nodeMax = 10;
2264 ctxt->node = NULL;
2265
2266 /* Allocate the Name stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002267 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00002268 if (ctxt->nameTab == NULL) {
2269 xmlGenericError(xmlGenericErrorContext,
2270 "xmlInitParserCtxt: out of memory\n");
2271 ctxt->nodeNr = 0;
2272 ctxt->nodeMax = 0;
2273 ctxt->node = NULL;
2274 ctxt->inputNr = 0;
2275 ctxt->inputMax = 0;
2276 ctxt->input = NULL;
2277 ctxt->nameNr = 0;
2278 ctxt->nameMax = 0;
2279 ctxt->name = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002280 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002281 }
2282 ctxt->nameNr = 0;
2283 ctxt->nameMax = 10;
2284 ctxt->name = NULL;
2285
2286 /* Allocate the space stack */
2287 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2288 if (ctxt->spaceTab == NULL) {
2289 xmlGenericError(xmlGenericErrorContext,
2290 "xmlInitParserCtxt: out of memory\n");
2291 ctxt->nodeNr = 0;
2292 ctxt->nodeMax = 0;
2293 ctxt->node = NULL;
2294 ctxt->inputNr = 0;
2295 ctxt->inputMax = 0;
2296 ctxt->input = NULL;
2297 ctxt->nameNr = 0;
2298 ctxt->nameMax = 0;
2299 ctxt->name = NULL;
2300 ctxt->spaceNr = 0;
2301 ctxt->spaceMax = 0;
2302 ctxt->space = NULL;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002303 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 }
2305 ctxt->spaceNr = 1;
2306 ctxt->spaceMax = 10;
2307 ctxt->spaceTab[0] = -1;
2308 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002309 ctxt->userData = ctxt;
2310 ctxt->myDoc = NULL;
2311 ctxt->wellFormed = 1;
2312 ctxt->valid = 1;
2313 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2314 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2315 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002316 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002317 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002318 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002319 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002320
Owen Taylor3473f882001-02-23 17:55:21 +00002321 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002322 ctxt->vctxt.error = xmlParserValidityError;
2323 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002324 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002325 if (xmlGetWarningsDefaultValue == 0)
2326 ctxt->vctxt.warning = NULL;
2327 else
2328 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002329 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002330 }
2331 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2332 ctxt->record_info = 0;
2333 ctxt->nbChars = 0;
2334 ctxt->checkIndex = 0;
2335 ctxt->inSubset = 0;
2336 ctxt->errNo = XML_ERR_OK;
2337 ctxt->depth = 0;
2338 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002339 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002340 xmlInitNodeInfoSeq(&ctxt->node_seq);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002341 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002342}
2343
2344/**
2345 * xmlFreeParserCtxt:
2346 * @ctxt: an XML parser context
2347 *
2348 * Free all the memory used by a parser context. However the parsed
2349 * document in ctxt->myDoc is not freed.
2350 */
2351
2352void
2353xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2354{
2355 xmlParserInputPtr input;
Owen Taylor3473f882001-02-23 17:55:21 +00002356
2357 if (ctxt == NULL) return;
2358
2359 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2360 xmlFreeInputStream(input);
2361 }
Owen Taylor3473f882001-02-23 17:55:21 +00002362 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002363 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
Owen Taylor3473f882001-02-23 17:55:21 +00002364 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2365 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2366 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2367 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00002368 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2369 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002370 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2371 xmlFree(ctxt->sax);
2372 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002373 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Igor Zlatkovicd37c1392003-08-28 10:34:33 +00002374 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002375 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002376 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002377 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2378 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2379 if (ctxt->attsDefault != NULL)
2380 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002381 if (ctxt->attsSpecial != NULL)
2382 xmlHashFree(ctxt->attsSpecial, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002383
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002384#ifdef LIBXML_CATALOG_ENABLED
2385 if (ctxt->catalogs != NULL)
2386 xmlCatalogFreeLocal(ctxt->catalogs);
2387#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002388 xmlFree(ctxt);
2389}
2390
2391/**
2392 * xmlNewParserCtxt:
2393 *
2394 * Allocate and initialize a new parser context.
2395 *
2396 * Returns the xmlParserCtxtPtr or NULL
2397 */
2398
2399xmlParserCtxtPtr
2400xmlNewParserCtxt()
2401{
2402 xmlParserCtxtPtr ctxt;
2403
2404 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2405 if (ctxt == NULL) {
2406 xmlGenericError(xmlGenericErrorContext,
2407 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002408 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002409 return(NULL);
2410 }
2411 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002412 if (xmlInitParserCtxt(ctxt) < 0) {
2413 xmlFreeParserCtxt(ctxt);
2414 return(NULL);
2415 }
Owen Taylor3473f882001-02-23 17:55:21 +00002416 return(ctxt);
2417}
2418
2419/************************************************************************
2420 * *
2421 * Handling of node informations *
2422 * *
2423 ************************************************************************/
2424
2425/**
2426 * xmlClearParserCtxt:
2427 * @ctxt: an XML parser context
2428 *
2429 * Clear (release owned resources) and reinitialize a parser context
2430 */
2431
2432void
2433xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2434{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002435 if (ctxt==NULL)
2436 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002437 xmlClearNodeInfoSeq(&ctxt->node_seq);
2438 xmlInitParserCtxt(ctxt);
2439}
2440
2441/**
2442 * xmlParserFindNodeInfo:
Daniel Veillard01c13b52002-12-10 15:19:08 +00002443 * @ctx: an XML parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002444 * @node: an XML node within the tree
2445 *
2446 * Find the parser node info struct for a given node
2447 *
2448 * Returns an xmlParserNodeInfo block pointer or NULL
2449 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002450const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2451 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002452{
2453 unsigned long pos;
2454
2455 /* Find position where node should be at */
2456 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002457 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002458 return &ctx->node_seq.buffer[pos];
2459 else
2460 return NULL;
2461}
2462
2463
2464/**
2465 * xmlInitNodeInfoSeq:
2466 * @seq: a node info sequence pointer
2467 *
2468 * -- Initialize (set to initial state) node info sequence
2469 */
2470void
2471xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2472{
2473 seq->length = 0;
2474 seq->maximum = 0;
2475 seq->buffer = NULL;
2476}
2477
2478/**
2479 * xmlClearNodeInfoSeq:
2480 * @seq: a node info sequence pointer
2481 *
2482 * -- Clear (release memory and reinitialize) node
2483 * info sequence
2484 */
2485void
2486xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2487{
2488 if ( seq->buffer != NULL )
2489 xmlFree(seq->buffer);
2490 xmlInitNodeInfoSeq(seq);
2491}
2492
2493
2494/**
2495 * xmlParserFindNodeInfoIndex:
2496 * @seq: a node info sequence pointer
2497 * @node: an XML node pointer
2498 *
2499 *
2500 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2501 * the given node is or should be at in a sorted sequence
2502 *
2503 * Returns a long indicating the position of the record
2504 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002505unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2506 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002507{
2508 unsigned long upper, lower, middle;
2509 int found = 0;
2510
2511 /* Do a binary search for the key */
2512 lower = 1;
2513 upper = seq->length;
2514 middle = 0;
2515 while ( lower <= upper && !found) {
2516 middle = lower + (upper - lower) / 2;
2517 if ( node == seq->buffer[middle - 1].node )
2518 found = 1;
2519 else if ( node < seq->buffer[middle - 1].node )
2520 upper = middle - 1;
2521 else
2522 lower = middle + 1;
2523 }
2524
2525 /* Return position */
2526 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2527 return middle;
2528 else
2529 return middle - 1;
2530}
2531
2532
2533/**
2534 * xmlParserAddNodeInfo:
2535 * @ctxt: an XML parser context
2536 * @info: a node info sequence pointer
2537 *
2538 * Insert node info record into the sorted sequence
2539 */
2540void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002541xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002542 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002543{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002544 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002545
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002546 /* Find pos and check to see if node is already in the sequence */
William M. Brack78637da2003-07-31 14:47:38 +00002547 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002548 info->node);
2549 if (pos < ctxt->node_seq.length
2550 && ctxt->node_seq.buffer[pos].node == info->node) {
2551 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002552 }
2553
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002554 /* Otherwise, we need to add new node to buffer */
2555 else {
2556 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2557 xmlParserNodeInfo *tmp_buffer;
2558 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002559
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002560 if (ctxt->node_seq.maximum == 0)
2561 ctxt->node_seq.maximum = 2;
2562 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2563 (2 * ctxt->node_seq.maximum));
2564
2565 if (ctxt->node_seq.buffer == NULL)
Daniel Veillardc4f65ab2003-04-21 23:07:45 +00002566 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002567 else
2568 tmp_buffer =
2569 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2570 byte_size);
2571
2572 if (tmp_buffer == NULL) {
2573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2574 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2575 ctxt->errNo = XML_ERR_NO_MEMORY;
2576 return;
2577 }
2578 ctxt->node_seq.buffer = tmp_buffer;
2579 ctxt->node_seq.maximum *= 2;
2580 }
2581
2582 /* If position is not at end, move elements out of the way */
2583 if (pos != ctxt->node_seq.length) {
2584 unsigned long i;
2585
2586 for (i = ctxt->node_seq.length; i > pos; i--)
2587 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2588 }
2589
2590 /* Copy element and increase length */
2591 ctxt->node_seq.buffer[pos] = *info;
2592 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002593 }
Owen Taylor3473f882001-02-23 17:55:21 +00002594}
2595
2596/************************************************************************
2597 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002598 * Defaults settings *
2599 * *
2600 ************************************************************************/
2601/**
2602 * xmlPedanticParserDefault:
2603 * @val: int 0 or 1
2604 *
2605 * Set and return the previous value for enabling pedantic warnings.
2606 *
2607 * Returns the last value for 0 for no substitution, 1 for substitution.
2608 */
2609
2610int
2611xmlPedanticParserDefault(int val) {
2612 int old = xmlPedanticParserDefaultValue;
2613
2614 xmlPedanticParserDefaultValue = val;
2615 return(old);
2616}
2617
2618/**
2619 * xmlLineNumbersDefault:
2620 * @val: int 0 or 1
2621 *
2622 * Set and return the previous value for enabling line numbers in elements
2623 * contents. This may break on old application and is turned off by default.
2624 *
2625 * Returns the last value for 0 for no substitution, 1 for substitution.
2626 */
2627
2628int
2629xmlLineNumbersDefault(int val) {
2630 int old = xmlLineNumbersDefaultValue;
2631
2632 xmlLineNumbersDefaultValue = val;
2633 return(old);
2634}
2635
2636/**
2637 * xmlSubstituteEntitiesDefault:
2638 * @val: int 0 or 1
2639 *
2640 * Set and return the previous value for default entity support.
2641 * Initially the parser always keep entity references instead of substituting
2642 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002643 * default parser behavior
2644 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002645 * file basis.
2646 *
2647 * Returns the last value for 0 for no substitution, 1 for substitution.
2648 */
2649
2650int
2651xmlSubstituteEntitiesDefault(int val) {
2652 int old = xmlSubstituteEntitiesDefaultValue;
2653
2654 xmlSubstituteEntitiesDefaultValue = val;
2655 return(old);
2656}
2657
2658/**
2659 * xmlKeepBlanksDefault:
2660 * @val: int 0 or 1
2661 *
2662 * Set and return the previous value for default blanks text nodes support.
2663 * The 1.x version of the parser used an heuristic to try to detect
2664 * ignorable white spaces. As a result the SAX callback was generating
2665 * ignorableWhitespace() callbacks instead of characters() one, and when
2666 * using the DOM output text nodes containing those blanks were not generated.
2667 * The 2.x and later version will switch to the XML standard way and
2668 * ignorableWhitespace() are only generated when running the parser in
2669 * validating mode and when the current element doesn't allow CDATA or
2670 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002671 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002672 * on 1.X libs and to switch back to the old mode for compatibility when
2673 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2674 * by using xmlIsBlankNode() commodity function to detect the "empty"
2675 * nodes generated.
2676 * This value also affect autogeneration of indentation when saving code
2677 * if blanks sections are kept, indentation is not generated.
2678 *
2679 * Returns the last value for 0 for no substitution, 1 for substitution.
2680 */
2681
2682int
2683xmlKeepBlanksDefault(int val) {
2684 int old = xmlKeepBlanksDefaultValue;
2685
2686 xmlKeepBlanksDefaultValue = val;
2687 xmlIndentTreeOutput = !val;
2688 return(old);
2689}
2690
2691/************************************************************************
2692 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002693 * Deprecated functions kept for compatibility *
2694 * *
2695 ************************************************************************/
2696
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002697/**
2698 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002699 * @lang: pointer to the string value
2700 *
2701 * Checks that the value conforms to the LanguageID production:
2702 *
2703 * NOTE: this is somewhat deprecated, those productions were removed from
2704 * the XML Second edition.
2705 *
2706 * [33] LanguageID ::= Langcode ('-' Subcode)*
2707 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2708 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2709 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2710 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2711 * [38] Subcode ::= ([a-z] | [A-Z])+
2712 *
2713 * Returns 1 if correct 0 otherwise
2714 **/
2715int
2716xmlCheckLanguageID(const xmlChar *lang) {
2717 const xmlChar *cur = lang;
2718
2719 if (cur == NULL)
2720 return(0);
2721 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2722 ((cur[0] == 'I') && (cur[1] == '-'))) {
2723 /*
2724 * IANA code
2725 */
2726 cur += 2;
2727 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2728 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2729 cur++;
2730 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2731 ((cur[0] == 'X') && (cur[1] == '-'))) {
2732 /*
2733 * User code
2734 */
2735 cur += 2;
2736 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2737 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2738 cur++;
2739 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2740 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2741 /*
2742 * ISO639
2743 */
2744 cur++;
2745 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2746 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2747 cur++;
2748 else
2749 return(0);
2750 } else
2751 return(0);
2752 while (cur[0] != 0) { /* non input consuming */
2753 if (cur[0] != '-')
2754 return(0);
2755 cur++;
2756 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2757 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2758 cur++;
2759 else
2760 return(0);
2761 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2762 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2763 cur++;
2764 }
2765 return(1);
2766}
2767
2768/**
2769 * xmlDecodeEntities:
2770 * @ctxt: the parser context
Owen Taylor3473f882001-02-23 17:55:21 +00002771 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002772 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
Owen Taylor3473f882001-02-23 17:55:21 +00002773 * @end: an end marker xmlChar, 0 if none
2774 * @end2: an end marker xmlChar, 0 if none
2775 * @end3: an end marker xmlChar, 0 if none
2776 *
2777 * This function is deprecated, we now always process entities content
2778 * through xmlStringDecodeEntities
2779 *
2780 * TODO: remove it in next major release.
2781 *
2782 * [67] Reference ::= EntityRef | CharRef
2783 *
2784 * [69] PEReference ::= '%' Name ';'
2785 *
2786 * Returns A newly allocated string with the substitution done. The caller
2787 * must deallocate it !
2788 */
2789xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002790xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2791 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002792#if 0
2793 xmlChar *buffer = NULL;
2794 unsigned int buffer_size = 0;
2795 unsigned int nbchars = 0;
2796
2797 xmlChar *current = NULL;
2798 xmlEntityPtr ent;
2799 unsigned int max = (unsigned int) len;
2800 int c,l;
2801#endif
2802
2803 static int deprecated = 0;
2804 if (!deprecated) {
2805 xmlGenericError(xmlGenericErrorContext,
2806 "xmlDecodeEntities() deprecated function reached\n");
2807 deprecated = 1;
2808 }
2809
2810#if 0
2811 if (ctxt->depth > 40) {
2812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2813 ctxt->sax->error(ctxt->userData,
2814 "Detected entity reference loop\n");
2815 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002816 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002817 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2818 return(NULL);
2819 }
2820
2821 /*
2822 * allocate a translation buffer.
2823 */
2824 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2825 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2826 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002827 xmlGenericError(xmlGenericErrorContext,
2828 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002829 return(NULL);
2830 }
2831
2832 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002833 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002834 */
2835 GROW;
2836 c = CUR_CHAR(l);
2837 while ((nbchars < max) && (c != end) && /* NOTUSED */
2838 (c != end2) && (c != end3)) {
2839 GROW;
2840 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002841 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002842 int val = xmlParseCharRef(ctxt);
2843 COPY_BUF(0,buffer,nbchars,val);
2844 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002845 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002846 (what & XML_SUBSTITUTE_REF)) {
2847 if (xmlParserDebugEntities)
2848 xmlGenericError(xmlGenericErrorContext,
2849 "decoding Entity Reference\n");
2850 ent = xmlParseEntityRef(ctxt);
2851 if ((ent != NULL) &&
2852 (ctxt->replaceEntities != 0)) {
2853 current = ent->content;
2854 while (*current != 0) { /* non input consuming loop */
2855 buffer[nbchars++] = *current++;
2856 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2857 growBuffer(buffer);
2858 }
2859 }
2860 } else if (ent != NULL) {
2861 const xmlChar *cur = ent->name;
2862
2863 buffer[nbchars++] = '&';
2864 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2865 growBuffer(buffer);
2866 }
2867 while (*cur != 0) { /* non input consuming loop */
2868 buffer[nbchars++] = *cur++;
2869 }
2870 buffer[nbchars++] = ';';
2871 }
2872 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2873 /*
2874 * a PEReference induce to switch the entity flow,
2875 * we break here to flush the current set of chars
2876 * parsed if any. We will be called back later.
2877 */
2878 if (xmlParserDebugEntities)
2879 xmlGenericError(xmlGenericErrorContext,
2880 "decoding PE Reference\n");
2881 if (nbchars != 0) break;
2882
2883 xmlParsePEReference(ctxt);
2884
2885 /*
2886 * Pop-up of finished entities.
2887 */
2888 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2889 xmlPopInput(ctxt);
2890
2891 break;
2892 } else {
2893 COPY_BUF(l,buffer,nbchars,c);
2894 NEXTL(l);
2895 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2896 growBuffer(buffer);
2897 }
2898 }
2899 c = CUR_CHAR(l);
2900 }
2901 buffer[nbchars++] = 0;
2902 return(buffer);
2903#endif
2904 return(NULL);
2905}
2906
2907/**
2908 * xmlNamespaceParseNCName:
2909 * @ctxt: an XML parser context
2910 *
2911 * parse an XML namespace name.
2912 *
2913 * TODO: this seems not in use anymore, the namespace handling is done on
2914 * top of the SAX interfaces, i.e. not on raw input.
2915 *
2916 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2917 *
2918 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2919 * CombiningChar | Extender
2920 *
2921 * Returns the namespace name or NULL
2922 */
2923
2924xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002925xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002926#if 0
2927 xmlChar buf[XML_MAX_NAMELEN + 5];
2928 int len = 0, l;
2929 int cur = CUR_CHAR(l);
2930#endif
2931
2932 static int deprecated = 0;
2933 if (!deprecated) {
2934 xmlGenericError(xmlGenericErrorContext,
2935 "xmlNamespaceParseNCName() deprecated function reached\n");
2936 deprecated = 1;
2937 }
2938
2939#if 0
2940 /* load first the value of the char !!! */
2941 GROW;
2942 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2943
2944xmlGenericError(xmlGenericErrorContext,
2945 "xmlNamespaceParseNCName: reached loop 3\n");
2946 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2947 (cur == '.') || (cur == '-') ||
2948 (cur == '_') ||
2949 (IS_COMBINING(cur)) ||
2950 (IS_EXTENDER(cur))) {
2951 COPY_BUF(l,buf,len,cur);
2952 NEXTL(l);
2953 cur = CUR_CHAR(l);
2954 if (len >= XML_MAX_NAMELEN) {
2955 xmlGenericError(xmlGenericErrorContext,
2956 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2957 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2958 (cur == '.') || (cur == '-') ||
2959 (cur == '_') ||
2960 (IS_COMBINING(cur)) ||
2961 (IS_EXTENDER(cur))) {
2962 NEXTL(l);
2963 cur = CUR_CHAR(l);
2964 }
2965 break;
2966 }
2967 }
2968 return(xmlStrndup(buf, len));
2969#endif
2970 return(NULL);
2971}
2972
2973/**
2974 * xmlNamespaceParseQName:
2975 * @ctxt: an XML parser context
2976 * @prefix: a xmlChar **
2977 *
2978 * TODO: this seems not in use anymore, the namespace handling is done on
2979 * top of the SAX interfaces, i.e. not on raw input.
2980 *
2981 * parse an XML qualified name
2982 *
2983 * [NS 5] QName ::= (Prefix ':')? LocalPart
2984 *
2985 * [NS 6] Prefix ::= NCName
2986 *
2987 * [NS 7] LocalPart ::= NCName
2988 *
2989 * Returns the local part, and prefix is updated
2990 * to get the Prefix if any.
2991 */
2992
2993xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002994xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002995
2996 static int deprecated = 0;
2997 if (!deprecated) {
2998 xmlGenericError(xmlGenericErrorContext,
2999 "xmlNamespaceParseQName() deprecated function reached\n");
3000 deprecated = 1;
3001 }
3002
3003#if 0
3004 xmlChar *ret = NULL;
3005
3006 *prefix = NULL;
3007 ret = xmlNamespaceParseNCName(ctxt);
3008 if (RAW == ':') {
3009 *prefix = ret;
3010 NEXT;
3011 ret = xmlNamespaceParseNCName(ctxt);
3012 }
3013
3014 return(ret);
3015#endif
3016 return(NULL);
3017}
3018
3019/**
3020 * xmlNamespaceParseNSDef:
3021 * @ctxt: an XML parser context
3022 *
3023 * parse a namespace prefix declaration
3024 *
3025 * TODO: this seems not in use anymore, the namespace handling is done on
3026 * top of the SAX interfaces, i.e. not on raw input.
3027 *
3028 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3029 *
3030 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
3031 *
3032 * Returns the namespace name
3033 */
3034
3035xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003036xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003037 static int deprecated = 0;
3038 if (!deprecated) {
3039 xmlGenericError(xmlGenericErrorContext,
3040 "xmlNamespaceParseNSDef() deprecated function reached\n");
3041 deprecated = 1;
3042 }
3043 return(NULL);
3044#if 0
3045 xmlChar *name = NULL;
3046
3047 if ((RAW == 'x') && (NXT(1) == 'm') &&
3048 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3049 (NXT(4) == 's')) {
3050 SKIP(5);
3051 if (RAW == ':') {
3052 NEXT;
3053 name = xmlNamespaceParseNCName(ctxt);
3054 }
3055 }
3056 return(name);
3057#endif
3058}
3059
3060/**
3061 * xmlParseQuotedString:
3062 * @ctxt: an XML parser context
3063 *
3064 * Parse and return a string between quotes or doublequotes
3065 *
3066 * TODO: Deprecated, to be removed at next drop of binary compatibility
3067 *
3068 * Returns the string parser or NULL.
3069 */
3070xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003071xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003072 static int deprecated = 0;
3073 if (!deprecated) {
3074 xmlGenericError(xmlGenericErrorContext,
3075 "xmlParseQuotedString() deprecated function reached\n");
3076 deprecated = 1;
3077 }
3078 return(NULL);
3079
3080#if 0
3081 xmlChar *buf = NULL;
3082 int len = 0,l;
3083 int size = XML_PARSER_BUFFER_SIZE;
3084 int c;
3085
3086 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3087 if (buf == NULL) {
3088 xmlGenericError(xmlGenericErrorContext,
3089 "malloc of %d byte failed\n", size);
3090 return(NULL);
3091 }
3092xmlGenericError(xmlGenericErrorContext,
3093 "xmlParseQuotedString: reached loop 4\n");
3094 if (RAW == '"') {
3095 NEXT;
3096 c = CUR_CHAR(l);
3097 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3098 if (len + 5 >= size) {
3099 size *= 2;
3100 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3101 if (buf == NULL) {
3102 xmlGenericError(xmlGenericErrorContext,
3103 "realloc of %d byte failed\n", size);
3104 return(NULL);
3105 }
3106 }
3107 COPY_BUF(l,buf,len,c);
3108 NEXTL(l);
3109 c = CUR_CHAR(l);
3110 }
3111 if (c != '"') {
3112 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt->userData,
3115 "String not closed \"%.50s\"\n", buf);
3116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 } else {
3119 NEXT;
3120 }
3121 } else if (RAW == '\''){
3122 NEXT;
3123 c = CUR;
3124 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3125 if (len + 1 >= size) {
3126 size *= 2;
3127 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3128 if (buf == NULL) {
3129 xmlGenericError(xmlGenericErrorContext,
3130 "realloc of %d byte failed\n", size);
3131 return(NULL);
3132 }
3133 }
3134 buf[len++] = c;
3135 NEXT;
3136 c = CUR;
3137 }
3138 if (RAW != '\'') {
3139 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142 "String not closed \"%.50s\"\n", buf);
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 } else {
3146 NEXT;
3147 }
3148 }
3149 return(buf);
3150#endif
3151}
3152
3153/**
3154 * xmlParseNamespace:
3155 * @ctxt: an XML parser context
3156 *
3157 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3158 *
3159 * This is what the older xml-name Working Draft specified, a bunch of
3160 * other stuff may still rely on it, so support is still here as
3161 * if it was declared on the root of the Tree:-(
3162 *
3163 * TODO: remove from library
3164 *
3165 * To be removed at next drop of binary compatibility
3166 */
3167
3168void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003169xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003170 static int deprecated = 0;
3171 if (!deprecated) {
3172 xmlGenericError(xmlGenericErrorContext,
3173 "xmlParseNamespace() deprecated function reached\n");
3174 deprecated = 1;
3175 }
3176
3177#if 0
3178 xmlChar *href = NULL;
3179 xmlChar *prefix = NULL;
3180 int garbage = 0;
3181
3182 /*
3183 * We just skipped "namespace" or "xml:namespace"
3184 */
3185 SKIP_BLANKS;
3186
3187xmlGenericError(xmlGenericErrorContext,
3188 "xmlParseNamespace: reached loop 5\n");
3189 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3190 /*
3191 * We can have "ns" or "prefix" attributes
3192 * Old encoding as 'href' or 'AS' attributes is still supported
3193 */
3194 if ((RAW == 'n') && (NXT(1) == 's')) {
3195 garbage = 0;
3196 SKIP(2);
3197 SKIP_BLANKS;
3198
3199 if (RAW != '=') continue;
3200 NEXT;
3201 SKIP_BLANKS;
3202
3203 href = xmlParseQuotedString(ctxt);
3204 SKIP_BLANKS;
3205 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3206 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3207 garbage = 0;
3208 SKIP(4);
3209 SKIP_BLANKS;
3210
3211 if (RAW != '=') continue;
3212 NEXT;
3213 SKIP_BLANKS;
3214
3215 href = xmlParseQuotedString(ctxt);
3216 SKIP_BLANKS;
3217 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3218 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3219 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3220 garbage = 0;
3221 SKIP(6);
3222 SKIP_BLANKS;
3223
3224 if (RAW != '=') continue;
3225 NEXT;
3226 SKIP_BLANKS;
3227
3228 prefix = xmlParseQuotedString(ctxt);
3229 SKIP_BLANKS;
3230 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3231 garbage = 0;
3232 SKIP(2);
3233 SKIP_BLANKS;
3234
3235 if (RAW != '=') continue;
3236 NEXT;
3237 SKIP_BLANKS;
3238
3239 prefix = xmlParseQuotedString(ctxt);
3240 SKIP_BLANKS;
3241 } else if ((RAW == '?') && (NXT(1) == '>')) {
3242 garbage = 0;
3243 NEXT;
3244 } else {
3245 /*
3246 * Found garbage when parsing the namespace
3247 */
3248 if (!garbage) {
3249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3250 ctxt->sax->error(ctxt->userData,
3251 "xmlParseNamespace found garbage\n");
3252 }
3253 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3254 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003255 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003256 NEXT;
3257 }
3258 }
3259
3260 MOVETO_ENDTAG(CUR_PTR);
3261 NEXT;
3262
3263 /*
3264 * Register the DTD.
3265 if (href != NULL)
3266 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3267 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3268 */
3269
3270 if (prefix != NULL) xmlFree(prefix);
3271 if (href != NULL) xmlFree(href);
3272#endif
3273}
3274
3275/**
3276 * xmlScanName:
3277 * @ctxt: an XML parser context
3278 *
3279 * Trickery: parse an XML name but without consuming the input flow
3280 * Needed for rollback cases. Used only when parsing entities references.
3281 *
3282 * TODO: seems deprecated now, only used in the default part of
3283 * xmlParserHandleReference
3284 *
3285 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3286 * CombiningChar | Extender
3287 *
3288 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3289 *
3290 * [6] Names ::= Name (S Name)*
3291 *
3292 * Returns the Name parsed or NULL
3293 */
3294
3295xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003296xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003297 static int deprecated = 0;
3298 if (!deprecated) {
3299 xmlGenericError(xmlGenericErrorContext,
3300 "xmlScanName() deprecated function reached\n");
3301 deprecated = 1;
3302 }
3303 return(NULL);
3304
3305#if 0
3306 xmlChar buf[XML_MAX_NAMELEN];
3307 int len = 0;
3308
3309 GROW;
3310 if (!IS_LETTER(RAW) && (RAW != '_') &&
3311 (RAW != ':')) {
3312 return(NULL);
3313 }
3314
3315
3316 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3317 (NXT(len) == '.') || (NXT(len) == '-') ||
3318 (NXT(len) == '_') || (NXT(len) == ':') ||
3319 (IS_COMBINING(NXT(len))) ||
3320 (IS_EXTENDER(NXT(len)))) {
3321 GROW;
3322 buf[len] = NXT(len);
3323 len++;
3324 if (len >= XML_MAX_NAMELEN) {
3325 xmlGenericError(xmlGenericErrorContext,
3326 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3327 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3328 (IS_DIGIT(NXT(len))) ||
3329 (NXT(len) == '.') || (NXT(len) == '-') ||
3330 (NXT(len) == '_') || (NXT(len) == ':') ||
3331 (IS_COMBINING(NXT(len))) ||
3332 (IS_EXTENDER(NXT(len))))
3333 len++;
3334 break;
3335 }
3336 }
3337 return(xmlStrndup(buf, len));
3338#endif
3339}
3340
3341/**
3342 * xmlParserHandleReference:
3343 * @ctxt: the parser context
3344 *
3345 * TODO: Remove, now deprecated ... the test is done directly in the
3346 * content parsing
3347 * routines.
3348 *
3349 * [67] Reference ::= EntityRef | CharRef
3350 *
3351 * [68] EntityRef ::= '&' Name ';'
3352 *
3353 * [ WFC: Entity Declared ]
3354 * the Name given in the entity reference must match that in an entity
3355 * declaration, except that well-formed documents need not declare any
3356 * of the following entities: amp, lt, gt, apos, quot.
3357 *
3358 * [ WFC: Parsed Entity ]
3359 * An entity reference must not contain the name of an unparsed entity
3360 *
3361 * [66] CharRef ::= '&#' [0-9]+ ';' |
3362 * '&#x' [0-9a-fA-F]+ ';'
3363 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003364 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003365 * the handling is done accordingly to
3366 * http://www.w3.org/TR/REC-xml#entproc
3367 */
3368void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003369xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003370 static int deprecated = 0;
3371 if (!deprecated) {
3372 xmlGenericError(xmlGenericErrorContext,
3373 "xmlParserHandleReference() deprecated function reached\n");
3374 deprecated = 1;
3375 }
3376
Owen Taylor3473f882001-02-23 17:55:21 +00003377 return;
3378}
3379
3380/**
3381 * xmlHandleEntity:
3382 * @ctxt: an XML parser context
3383 * @entity: an XML entity pointer.
3384 *
3385 * Default handling of defined entities, when should we define a new input
3386 * stream ? When do we just handle that as a set of chars ?
3387 *
3388 * OBSOLETE: to be removed at some point.
3389 */
3390
3391void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003392xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003393 static int deprecated = 0;
3394 if (!deprecated) {
3395 xmlGenericError(xmlGenericErrorContext,
3396 "xmlHandleEntity() deprecated function reached\n");
3397 deprecated = 1;
3398 }
3399
3400#if 0
3401 int len;
3402 xmlParserInputPtr input;
3403
3404 if (entity->content == NULL) {
3405 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3407 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3408 entity->name);
3409 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003410 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003411 return;
3412 }
3413 len = xmlStrlen(entity->content);
3414 if (len <= 2) goto handle_as_char;
3415
3416 /*
3417 * Redefine its content as an input stream.
3418 */
3419 input = xmlNewEntityInputStream(ctxt, entity);
3420 xmlPushInput(ctxt, input);
3421 return;
3422
3423handle_as_char:
3424 /*
3425 * Just handle the content as a set of chars.
3426 */
3427 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3428 (ctxt->sax->characters != NULL))
3429 ctxt->sax->characters(ctxt->userData, entity->content, len);
3430#endif
3431}
3432
3433/**
3434 * xmlNewGlobalNs:
3435 * @doc: the document carrying the namespace
3436 * @href: the URI associated
3437 * @prefix: the prefix for the namespace
3438 *
3439 * Creation of a Namespace, the old way using PI and without scoping
3440 * DEPRECATED !!!
3441 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003442 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003443 */
3444xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003445xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3446 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003447 static int deprecated = 0;
3448 if (!deprecated) {
3449 xmlGenericError(xmlGenericErrorContext,
3450 "xmlNewGlobalNs() deprecated function reached\n");
3451 deprecated = 1;
3452 }
3453 return(NULL);
3454#if 0
3455 xmlNodePtr root;
3456
3457 xmlNsPtr cur;
3458
3459 root = xmlDocGetRootElement(doc);
3460 if (root != NULL)
3461 return(xmlNewNs(root, href, prefix));
3462
3463 /*
3464 * if there is no root element yet, create an old Namespace type
3465 * and it will be moved to the root at save time.
3466 */
3467 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3468 if (cur == NULL) {
3469 xmlGenericError(xmlGenericErrorContext,
3470 "xmlNewGlobalNs : malloc failed\n");
3471 return(NULL);
3472 }
3473 memset(cur, 0, sizeof(xmlNs));
3474 cur->type = XML_GLOBAL_NAMESPACE;
3475
3476 if (href != NULL)
3477 cur->href = xmlStrdup(href);
3478 if (prefix != NULL)
3479 cur->prefix = xmlStrdup(prefix);
3480
3481 /*
3482 * Add it at the end to preserve parsing order ...
3483 */
3484 if (doc != NULL) {
3485 if (doc->oldNs == NULL) {
3486 doc->oldNs = cur;
3487 } else {
3488 xmlNsPtr prev = doc->oldNs;
3489
3490 while (prev->next != NULL) prev = prev->next;
3491 prev->next = cur;
3492 }
3493 }
3494
3495 return(NULL);
3496#endif
3497}
3498
3499/**
3500 * xmlUpgradeOldNs:
3501 * @doc: a document pointer
3502 *
3503 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3504 * DEPRECATED
3505 */
3506void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003507xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003508 static int deprecated = 0;
3509 if (!deprecated) {
3510 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003511 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003512 deprecated = 1;
3513 }
3514#if 0
3515 xmlNsPtr cur;
3516
3517 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3518 if (doc->children == NULL) {
3519#ifdef DEBUG_TREE
3520 xmlGenericError(xmlGenericErrorContext,
3521 "xmlUpgradeOldNs: failed no root !\n");
3522#endif
3523 return;
3524 }
3525
3526 cur = doc->oldNs;
3527 while (cur->next != NULL) {
3528 cur->type = XML_LOCAL_NAMESPACE;
3529 cur = cur->next;
3530 }
3531 cur->type = XML_LOCAL_NAMESPACE;
3532 cur->next = doc->children->nsDef;
3533 doc->children->nsDef = doc->oldNs;
3534 doc->oldNs = NULL;
3535#endif
3536}
3537