blob: c539714def69a634c2372ee381495c7e609c23b4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Daniel Veillard3c5ed912002-01-08 10:36:16 +000012#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000084 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000087 }
88 if ((myversion / 100) < (version / 100)) {
89 xmlGenericError(xmlGenericErrorContext,
90 "Warning: program compiled against libxml %d using older %d\n",
91 (version / 100), (myversion / 100));
92 }
93}
94
95
Daniel Veillard22090732001-07-16 00:06:07 +000096static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000097 "validate",
98 "load subset",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000141/**
Owen Taylor3473f882001-02-23 17:55:21 +0000142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000168/**
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->loadsubset;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000269/**
Owen Taylor3473f882001-02-23 17:55:21 +0000270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 int newvalidate = *((int *) value);
286 if ((!ctxt->validate) && (newvalidate != 0)) {
287 if (ctxt->vctxt.warning == NULL)
288 ctxt->vctxt.warning = xmlParserValidityWarning;
289 if (ctxt->vctxt.error == NULL)
290 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000291 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000292 }
293 ctxt->validate = newvalidate;
294 } else if (!strcmp(name, "keep blanks")) {
295 ctxt->keepBlanks = *((int *) value);
296 } else if (!strcmp(name, "disable SAX")) {
297 ctxt->disableSAX = *((int *) value);
298 } else if (!strcmp(name, "fetch external entities")) {
299 ctxt->loadsubset = *((int *) value);
300 } else if (!strcmp(name, "substitute entities")) {
301 ctxt->replaceEntities = *((int *) value);
302 } else if (!strcmp(name, "gather line info")) {
303 ctxt->record_info = *((int *) value);
304 } else if (!strcmp(name, "user data")) {
305 ctxt->userData = *((void **)value);
306 } else if (!strcmp(name, "is html")) {
307 ctxt->html = *((int *) value);
308 } else if (!strcmp(name, "is standalone")) {
309 ctxt->standalone = *((int *) value);
310 } else if (!strcmp(name, "document")) {
311 ctxt->myDoc = *((xmlDocPtr *) value);
312 } else if (!strcmp(name, "is well formed")) {
313 ctxt->wellFormed = *((int *) value);
314 } else if (!strcmp(name, "is valid")) {
315 ctxt->valid = *((int *) value);
316 } else if (!strcmp(name, "SAX block")) {
317 ctxt->sax = *((xmlSAXHandlerPtr *) value);
318 } else if (!strcmp(name, "SAX function internalSubset")) {
319 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function isStandalone")) {
321 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
323 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
325 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function resolveEntity")) {
327 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function getEntity")) {
329 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
330 } else if (!strcmp(name, "SAX function entityDecl")) {
331 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function notationDecl")) {
333 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function attributeDecl")) {
335 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function elementDecl")) {
337 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
339 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
341 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startDocument")) {
343 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endDocument")) {
345 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function startElement")) {
347 ctxt->sax->startElement = *((startElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function endElement")) {
349 ctxt->sax->endElement = *((endElementSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function reference")) {
351 ctxt->sax->reference = *((referenceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function characters")) {
353 ctxt->sax->characters = *((charactersSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
355 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function processingInstruction")) {
357 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function comment")) {
359 ctxt->sax->comment = *((commentSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function warning")) {
361 ctxt->sax->warning = *((warningSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function error")) {
363 ctxt->sax->error = *((errorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function fatalError")) {
365 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function getParameterEntity")) {
367 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
368 } else if (!strcmp(name, "SAX function cdataBlock")) {
369 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function externalSubset")) {
371 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
372 } else {
373 return(-1);
374 }
375 return(0);
376}
377
378/************************************************************************
379 * *
380 * Some functions to avoid too large macros *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlIsChar:
386 * @c: an unicode character (int)
387 *
388 * Check whether the character is allowed by the production
389 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
390 * | [#x10000-#x10FFFF]
391 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
392 * Also available as a macro IS_CHAR()
393 *
394 * Returns 0 if not, non-zero otherwise
395 */
396int
397xmlIsChar(int c) {
398 return(
399 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
400 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
401 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
402 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
403}
404
405/**
406 * xmlIsBlank:
407 * @c: an unicode character (int)
408 *
409 * Check whether the character is allowed by the production
410 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
411 * Also available as a macro IS_BLANK()
412 *
413 * Returns 0 if not, non-zero otherwise
414 */
415int
416xmlIsBlank(int c) {
417 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
418}
419
420/**
421 * xmlIsBaseChar:
422 * @c: an unicode character (int)
423 *
424 * Check whether the character is allowed by the production
425 * [85] BaseChar ::= ... long list see REC ...
426 *
427 * VI is your friend !
428 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
429 * and
430 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
431 *
432 * Returns 0 if not, non-zero otherwise
433 */
434static int xmlBaseArray[] = {
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
441 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
450 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
451};
452
453int
454xmlIsBaseChar(int c) {
455 return(
456 (((c) < 0x0100) ? xmlBaseArray[c] :
457 ( /* accelerator */
458 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
459 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
460 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
461 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
462 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
463 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
464 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
465 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
466 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
467 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
468 ((c) == 0x0386) ||
469 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
470 ((c) == 0x038C) ||
471 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
472 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
473 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
474 ((c) == 0x03DA) ||
475 ((c) == 0x03DC) ||
476 ((c) == 0x03DE) ||
477 ((c) == 0x03E0) ||
478 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
479 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
480 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
481 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
482 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
483 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
484 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
485 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
486 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
487 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
488 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
489 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
490 ((c) == 0x0559) ||
491 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
492 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
493 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
494 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
495 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
496 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
497 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
498 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
499 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
500 ((c) == 0x06D5) ||
501 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
502 (((c) >= 0x905) && ( /* accelerator */
503 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
504 ((c) == 0x093D) ||
505 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
506 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
507 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
508 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
509 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
510 ((c) == 0x09B2) ||
511 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
512 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
513 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
514 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
515 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
516 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
517 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
518 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
519 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
520 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
521 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
522 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
523 ((c) == 0x0A5E) ||
524 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
525 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
526 ((c) == 0x0A8D) ||
527 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
528 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
529 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
530 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
531 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
532 ((c) == 0x0ABD) ||
533 ((c) == 0x0AE0) ||
534 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
535 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
536 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
537 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
538 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
539 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
540 ((c) == 0x0B3D) ||
541 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
542 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
543 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
544 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
545 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
546 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
547 ((c) == 0x0B9C) ||
548 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
549 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
550 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
551 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
552 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
553 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
554 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
555 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
556 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
557 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
558 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
559 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
560 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
561 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
562 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
563 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
564 ((c) == 0x0CDE) ||
565 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
566 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
567 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
568 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
569 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
570 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
571 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
572 ((c) == 0x0E30) ||
573 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
574 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
575 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
576 ((c) == 0x0E84) ||
577 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
578 ((c) == 0x0E8A) ||
579 ((c) == 0x0E8D) ||
580 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
581 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
582 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
583 ((c) == 0x0EA5) ||
584 ((c) == 0x0EA7) ||
585 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
586 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
587 ((c) == 0x0EB0) ||
588 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
589 ((c) == 0x0EBD) ||
590 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
591 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
592 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
593 (((c) >= 0x10A0) && ( /* accelerator */
594 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
595 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
596 ((c) == 0x1100) ||
597 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
598 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
599 ((c) == 0x1109) ||
600 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
601 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
602 ((c) == 0x113C) ||
603 ((c) == 0x113E) ||
604 ((c) == 0x1140) ||
605 ((c) == 0x114C) ||
606 ((c) == 0x114E) ||
607 ((c) == 0x1150) ||
608 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
609 ((c) == 0x1159) ||
610 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
611 ((c) == 0x1163) ||
612 ((c) == 0x1165) ||
613 ((c) == 0x1167) ||
614 ((c) == 0x1169) ||
615 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
616 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
617 ((c) == 0x1175) ||
618 ((c) == 0x119E) ||
619 ((c) == 0x11A8) ||
620 ((c) == 0x11AB) ||
621 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
622 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
623 ((c) == 0x11BA) ||
624 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
625 ((c) == 0x11EB) ||
626 ((c) == 0x11F0) ||
627 ((c) == 0x11F9) ||
628 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
629 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
630 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
631 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
632 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
633 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
634 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
635 ((c) == 0x1F59) ||
636 ((c) == 0x1F5B) ||
637 ((c) == 0x1F5D) ||
638 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
639 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
640 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
641 ((c) == 0x1FBE) ||
642 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
643 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
644 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
645 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
646 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
647 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
648 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
649 ((c) == 0x2126) ||
650 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
651 ((c) == 0x212E) ||
652 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
653 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
654 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
655 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
656 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
657}
658
659/**
660 * xmlIsDigit:
661 * @c: an unicode character (int)
662 *
663 * Check whether the character is allowed by the production
664 * [88] Digit ::= ... long list see REC ...
665 *
666 * Returns 0 if not, non-zero otherwise
667 */
668int
669xmlIsDigit(int c) {
670 return(
671 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
672 (((c) >= 0x660) && ( /* accelerator */
673 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
674 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
675 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
676 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
677 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
678 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
679 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
680 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
681 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
682 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
683 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
684 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
685 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
686 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
687}
688
689/**
690 * xmlIsCombining:
691 * @c: an unicode character (int)
692 *
693 * Check whether the character is allowed by the production
694 * [87] CombiningChar ::= ... long list see REC ...
695 *
696 * Returns 0 if not, non-zero otherwise
697 */
698int
699xmlIsCombining(int c) {
700 return(
701 (((c) >= 0x300) && ( /* accelerator */
702 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
703 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
704 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
705 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
706 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
707 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
708 ((c) == 0x05BF) ||
709 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
710 ((c) == 0x05C4) ||
711 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
712 ((c) == 0x0670) ||
713 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
714 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
715 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
716 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
717 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
718 (((c) >= 0x0901) && ( /* accelerator */
719 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
720 ((c) == 0x093C) ||
721 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
722 ((c) == 0x094D) ||
723 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
724 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
725 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
726 ((c) == 0x09BC) ||
727 ((c) == 0x09BE) ||
728 ((c) == 0x09BF) ||
729 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
730 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
731 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
732 ((c) == 0x09D7) ||
733 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
734 (((c) >= 0x0A02) && ( /* accelerator */
735 ((c) == 0x0A02) ||
736 ((c) == 0x0A3C) ||
737 ((c) == 0x0A3E) ||
738 ((c) == 0x0A3F) ||
739 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
740 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
741 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
742 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
743 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
744 ((c) == 0x0ABC) ||
745 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
746 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
747 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
748 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
749 ((c) == 0x0B3C) ||
750 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
751 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
752 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
753 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
754 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
755 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
756 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
757 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
758 ((c) == 0x0BD7) ||
759 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
760 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
761 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
762 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
763 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
764 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
765 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
766 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
767 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
768 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
769 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
770 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
771 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
772 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
773 ((c) == 0x0D57) ||
774 (((c) >= 0x0E31) && ( /* accelerator */
775 ((c) == 0x0E31) ||
776 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
777 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
778 ((c) == 0x0EB1) ||
779 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
780 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
781 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
782 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 ((c) == 0x0F35) ||
784 ((c) == 0x0F37) ||
785 ((c) == 0x0F39) ||
786 ((c) == 0x0F3E) ||
787 ((c) == 0x0F3F) ||
788 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
789 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
790 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
791 ((c) == 0x0F97) ||
792 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
793 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
794 ((c) == 0x0FB9) ||
795 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
796 ((c) == 0x20E1) ||
797 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
798 ((c) == 0x3099) ||
799 ((c) == 0x309A))))))))));
800}
801
802/**
803 * xmlIsExtender:
804 * @c: an unicode character (int)
805 *
806 * Check whether the character is allowed by the production
807 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
808 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
809 * [#x309D-#x309E] | [#x30FC-#x30FE]
810 *
811 * Returns 0 if not, non-zero otherwise
812 */
813int
814xmlIsExtender(int c) {
815 switch (c) {
816 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
817 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
818 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
819 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000820 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000821 return 1;
822 default:
823 return 0;
824 }
825}
826
827/**
828 * xmlIsIdeographic:
829 * @c: an unicode character (int)
830 *
831 * Check whether the character is allowed by the production
832 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
833 *
834 * Returns 0 if not, non-zero otherwise
835 */
836int
837xmlIsIdeographic(int c) {
838 return(((c) < 0x0100) ? 0 :
839 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
840 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
841 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 ((c) == 0x3007));
843}
844
845/**
846 * xmlIsLetter:
847 * @c: an unicode character (int)
848 *
849 * Check whether the character is allowed by the production
850 * [84] Letter ::= BaseChar | Ideographic
851 *
852 * Returns 0 if not, non-zero otherwise
853 */
854int
855xmlIsLetter(int c) {
856 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
857}
858
859/**
860 * xmlIsPubidChar:
861 * @c: an unicode character (int)
862 *
863 * Check whether the character is allowed by the production
864 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
865 *
866 * Returns 0 if not, non-zero otherwise
867 */
868int
869xmlIsPubidChar(int c) {
870 return(
871 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
872 (((c) >= 'a') && ((c) <= 'z')) ||
873 (((c) >= 'A') && ((c) <= 'Z')) ||
874 (((c) >= '0') && ((c) <= '9')) ||
875 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
876 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
877 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
878 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
879 ((c) == '$') || ((c) == '_') || ((c) == '%'));
880}
881
882/************************************************************************
883 * *
884 * Input handling functions for progressive parsing *
885 * *
886 ************************************************************************/
887
888/* #define DEBUG_INPUT */
889/* #define DEBUG_STACK */
890/* #define DEBUG_PUSH */
891
892
893/* we need to keep enough input to show errors in context */
894#define LINE_LEN 80
895
896#ifdef DEBUG_INPUT
897#define CHECK_BUFFER(in) check_buffer(in)
898
899void check_buffer(xmlParserInputPtr in) {
900 if (in->base != in->buf->buffer->content) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: base mismatch problem\n");
903 }
904 if (in->cur < in->base) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur < base problem\n");
907 }
908 if (in->cur > in->base + in->buf->buffer->use) {
909 xmlGenericError(xmlGenericErrorContext,
910 "xmlParserInput: cur > base + use problem\n");
911 }
912 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
913 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
914 in->buf->buffer->use, in->buf->buffer->size);
915}
916
917#else
918#define CHECK_BUFFER(in)
919#endif
920
921
922/**
923 * xmlParserInputRead:
924 * @in: an XML parser input
925 * @len: an indicative size for the lookahead
926 *
927 * This function refresh the input for the parser. It doesn't try to
928 * preserve pointers to the input buffer, and discard already read data
929 *
930 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
931 * end of this entity
932 */
933int
934xmlParserInputRead(xmlParserInputPtr in, int len) {
935 int ret;
936 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000937 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000938
939#ifdef DEBUG_INPUT
940 xmlGenericError(xmlGenericErrorContext, "Read\n");
941#endif
942 if (in->buf == NULL) return(-1);
943 if (in->base == NULL) return(-1);
944 if (in->cur == NULL) return(-1);
945 if (in->buf->buffer == NULL) return(-1);
946 if (in->buf->readcallback == NULL) return(-1);
947
948 CHECK_BUFFER(in);
949
950 used = in->cur - in->buf->buffer->content;
951 ret = xmlBufferShrink(in->buf->buffer, used);
952 if (ret > 0) {
953 in->cur -= ret;
954 in->consumed += ret;
955 }
956 ret = xmlParserInputBufferRead(in->buf, len);
957 if (in->base != in->buf->buffer->content) {
958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000959 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000962 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000963 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000965 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 CHECK_BUFFER(in);
968
969 return(ret);
970}
971
972/**
973 * xmlParserInputGrow:
974 * @in: an XML parser input
975 * @len: an indicative size for the lookahead
976 *
977 * This function increase the input for the parser. It tries to
978 * preserve pointers to the input buffer, and keep already read data
979 *
980 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
981 * end of this entity
982 */
983int
984xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988#ifdef DEBUG_INPUT
989 xmlGenericError(xmlGenericErrorContext, "Grow\n");
990#endif
991 if (in->buf == NULL) return(-1);
992 if (in->base == NULL) return(-1);
993 if (in->cur == NULL) return(-1);
994 if (in->buf->buffer == NULL) return(-1);
995
996 CHECK_BUFFER(in);
997
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 indx = in->cur - in->base;
999 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001000
1001 CHECK_BUFFER(in);
1002
1003 return(0);
1004 }
1005 if (in->buf->readcallback != NULL)
1006 ret = xmlParserInputBufferGrow(in->buf, len);
1007 else
1008 return(0);
1009
1010 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001011 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001012 * block, but we use it really as an integer to do some
1013 * pointer arithmetic. Insure will raise it as a bug but in
1014 * that specific case, that's not !
1015 */
1016 if (in->base != in->buf->buffer->content) {
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001021 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001022 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001023 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001024 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001025
1026 CHECK_BUFFER(in);
1027
1028 return(ret);
1029}
1030
1031/**
1032 * xmlParserInputShrink:
1033 * @in: an XML parser input
1034 *
1035 * This function removes used input for the parser.
1036 */
1037void
1038xmlParserInputShrink(xmlParserInputPtr in) {
1039 int used;
1040 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001041 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001042
1043#ifdef DEBUG_INPUT
1044 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1045#endif
1046 if (in->buf == NULL) return;
1047 if (in->base == NULL) return;
1048 if (in->cur == NULL) return;
1049 if (in->buf->buffer == NULL) return;
1050
1051 CHECK_BUFFER(in);
1052
1053 used = in->cur - in->buf->buffer->content;
1054 /*
1055 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001056 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001057 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001058 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001059 return;
1060 if (used > INPUT_CHUNK) {
1061 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 if (ret > 0) {
1063 in->cur -= ret;
1064 in->consumed += ret;
1065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001066 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001067 }
1068
1069 CHECK_BUFFER(in);
1070
1071 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 return;
1073 }
1074 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1075 if (in->base != in->buf->buffer->content) {
1076 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001077 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001083 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001084
1085 CHECK_BUFFER(in);
1086}
1087
1088/************************************************************************
1089 * *
1090 * UTF8 character input and related functions *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlNextChar:
1096 * @ctxt: the XML parser context
1097 *
1098 * Skip to the next char input char.
1099 */
1100
1101void
1102xmlNextChar(xmlParserCtxtPtr ctxt) {
1103 if (ctxt->instate == XML_PARSER_EOF)
1104 return;
1105
1106 /*
1107 * 2.11 End-of-Line Handling
1108 * the literal two-character sequence "#xD#xA" or a standalone
1109 * literal #xD, an XML processor must pass to the application
1110 * the single character #xA.
1111 */
1112 if (ctxt->token != 0) ctxt->token = 0;
1113 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1114 if ((*ctxt->input->cur == 0) &&
1115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
1207 if (*ctxt->input->cur == 0)
1208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
1210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1211 xmlParserHandlePEReference(ctxt);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
1230 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001231 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001232 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1233
1234 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1235 ctxt->input->cur++;
1236 return;
1237}
1238
1239/**
1240 * xmlCurrentChar:
1241 * @ctxt: the XML parser context
1242 * @len: pointer to the length of the char read
1243 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001244 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001245 * bytes in the input buffer. Implement the end of line normalization:
1246 * 2.11 End-of-Line Handling
1247 * Wherever an external parsed entity or the literal entity value
1248 * of an internal parsed entity contains either the literal two-character
1249 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1250 * must pass to the application the single character #xA.
1251 * This behavior can conveniently be produced by normalizing all
1252 * line breaks to #xA on input, before parsing.)
1253 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001254 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001255 */
1256
1257int
1258xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1259 if (ctxt->instate == XML_PARSER_EOF)
1260 return(0);
1261
1262 if (ctxt->token != 0) {
1263 *len = 0;
1264 return(ctxt->token);
1265 }
1266 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1267 *len = 1;
1268 return((int) *ctxt->input->cur);
1269 }
1270 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1271 /*
1272 * We are supposed to handle UTF8, check it's valid
1273 * From rfc2044: encoding of the Unicode values on UTF-8:
1274 *
1275 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1276 * 0000 0000-0000 007F 0xxxxxxx
1277 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1278 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1279 *
1280 * Check for the 0x110000 limit too
1281 */
1282 const unsigned char *cur = ctxt->input->cur;
1283 unsigned char c;
1284 unsigned int val;
1285
1286 c = *cur;
1287 if (c & 0x80) {
1288 if (cur[1] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if ((cur[1] & 0xc0) != 0x80)
1291 goto encoding_error;
1292 if ((c & 0xe0) == 0xe0) {
1293
1294 if (cur[2] == 0)
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((cur[2] & 0xc0) != 0x80)
1297 goto encoding_error;
1298 if ((c & 0xf0) == 0xf0) {
1299 if (cur[3] == 0)
1300 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1301 if (((c & 0xf8) != 0xf0) ||
1302 ((cur[3] & 0xc0) != 0x80))
1303 goto encoding_error;
1304 /* 4-byte code */
1305 *len = 4;
1306 val = (cur[0] & 0x7) << 18;
1307 val |= (cur[1] & 0x3f) << 12;
1308 val |= (cur[2] & 0x3f) << 6;
1309 val |= cur[3] & 0x3f;
1310 } else {
1311 /* 3-byte code */
1312 *len = 3;
1313 val = (cur[0] & 0xf) << 12;
1314 val |= (cur[1] & 0x3f) << 6;
1315 val |= cur[2] & 0x3f;
1316 }
1317 } else {
1318 /* 2-byte code */
1319 *len = 2;
1320 val = (cur[0] & 0x1f) << 6;
1321 val |= cur[1] & 0x3f;
1322 }
1323 if (!IS_CHAR(val)) {
1324 if ((ctxt->sax != NULL) &&
1325 (ctxt->sax->error != NULL))
1326 ctxt->sax->error(ctxt->userData,
1327 "Char 0x%X out of allowed range\n", val);
1328 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1329 ctxt->wellFormed = 0;
1330 ctxt->disableSAX = 1;
1331 }
1332 return(val);
1333 } else {
1334 /* 1-byte code */
1335 *len = 1;
1336 if (*ctxt->input->cur == 0xD) {
1337 if (ctxt->input->cur[1] == 0xA) {
1338 ctxt->nbChars++;
1339 ctxt->input->cur++;
1340 }
1341 return(0xA);
1342 }
1343 return((int) *ctxt->input->cur);
1344 }
1345 }
1346 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001347 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001348 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001349 * XML constructs only use < 128 chars
1350 */
1351 *len = 1;
1352 if (*ctxt->input->cur == 0xD) {
1353 if (ctxt->input->cur[1] == 0xA) {
1354 ctxt->nbChars++;
1355 ctxt->input->cur++;
1356 }
1357 return(0xA);
1358 }
1359 return((int) *ctxt->input->cur);
1360encoding_error:
1361 /*
1362 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001363 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001364 * declaration header. Report the error and switch the encoding
1365 * to ISO-Latin-1 (if you don't like this policy, just declare the
1366 * encoding !)
1367 */
1368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1369 ctxt->sax->error(ctxt->userData,
1370 "Input is not proper UTF-8, indicate encoding !\n");
1371 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1372 ctxt->input->cur[0], ctxt->input->cur[1],
1373 ctxt->input->cur[2], ctxt->input->cur[3]);
1374 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001375 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001376 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1377
1378 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1379 *len = 1;
1380 return((int) *ctxt->input->cur);
1381}
1382
1383/**
1384 * xmlStringCurrentChar:
1385 * @ctxt: the XML parser context
1386 * @cur: pointer to the beginning of the char
1387 * @len: pointer to the length of the char read
1388 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001389 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001390 * bytes in the input buffer.
1391 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001392 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001393 */
1394
1395int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001396xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1397{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001398 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001399 /*
1400 * We are supposed to handle UTF8, check it's valid
1401 * From rfc2044: encoding of the Unicode values on UTF-8:
1402 *
1403 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1404 * 0000 0000-0000 007F 0xxxxxxx
1405 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1406 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1407 *
1408 * Check for the 0x110000 limit too
1409 */
1410 unsigned char c;
1411 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001412
Daniel Veillardd8224e02002-01-13 15:43:22 +00001413 c = *cur;
1414 if (c & 0x80) {
1415 if ((cur[1] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001418
Daniel Veillardd8224e02002-01-13 15:43:22 +00001419 if ((cur[2] & 0xc0) != 0x80)
1420 goto encoding_error;
1421 if ((c & 0xf0) == 0xf0) {
1422 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1423 goto encoding_error;
1424 /* 4-byte code */
1425 *len = 4;
1426 val = (cur[0] & 0x7) << 18;
1427 val |= (cur[1] & 0x3f) << 12;
1428 val |= (cur[2] & 0x3f) << 6;
1429 val |= cur[3] & 0x3f;
1430 } else {
1431 /* 3-byte code */
1432 *len = 3;
1433 val = (cur[0] & 0xf) << 12;
1434 val |= (cur[1] & 0x3f) << 6;
1435 val |= cur[2] & 0x3f;
1436 }
1437 } else {
1438 /* 2-byte code */
1439 *len = 2;
1440 val = (cur[0] & 0x1f) << 6;
1441 val |= cur[1] & 0x3f;
1442 }
1443 if (!IS_CHAR(val)) {
1444 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1445 (ctxt->sax->error != NULL))
1446 ctxt->sax->error(ctxt->userData,
1447 "Char 0x%X out of allowed range\n",
1448 val);
1449 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1450 ctxt->wellFormed = 0;
1451 ctxt->disableSAX = 1;
1452 }
1453 return (val);
1454 } else {
1455 /* 1-byte code */
1456 *len = 1;
1457 return ((int) *cur);
1458 }
Owen Taylor3473f882001-02-23 17:55:21 +00001459 }
1460 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001461 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001463 * XML constructs only use < 128 chars
1464 */
1465 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001466 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001467encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001468
Owen Taylor3473f882001-02-23 17:55:21 +00001469 /*
1470 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001471 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001472 * declaration header. Report the error and switch the encoding
1473 * to ISO-Latin-1 (if you don't like this policy, just declare the
1474 * encoding !)
1475 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001476 if (ctxt != NULL) {
1477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1478 ctxt->sax->error(ctxt->userData,
1479 "Input is not proper UTF-8, indicate encoding !\n");
1480 ctxt->sax->error(ctxt->userData,
1481 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1482 ctxt->input->cur[0], ctxt->input->cur[1],
1483 ctxt->input->cur[2], ctxt->input->cur[3]);
1484 }
1485 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001486 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001487 }
Owen Taylor3473f882001-02-23 17:55:21 +00001488
1489 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001490 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001491}
1492
1493/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001494 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001495 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001496 * @val: the char value
1497 *
1498 * append the char value in the array
1499 *
1500 * Returns the number of xmlChar written
1501 */
Owen Taylor3473f882001-02-23 17:55:21 +00001502int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001503xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001504 /*
1505 * We are supposed to handle UTF8, check it's valid
1506 * From rfc2044: encoding of the Unicode values on UTF-8:
1507 *
1508 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1509 * 0000 0000-0000 007F 0xxxxxxx
1510 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1511 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1512 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001513 if (val >= 0x80) {
1514 xmlChar *savedout = out;
1515 int bits;
1516 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1517 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1518 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1519 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001521 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001522 val);
1523 return(0);
1524 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001525 for ( ; bits >= 0; bits-= 6)
1526 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1527 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 }
1529 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001530 return 1;
1531}
1532
1533/**
1534 * xmlCopyChar:
1535 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001536 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001537 * @val: the char value
1538 *
1539 * append the char value in the array
1540 *
1541 * Returns the number of xmlChar written
1542 */
1543
1544int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001545xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001546 /* the len parameter is ignored */
1547 if (val >= 0x80) {
1548 return(xmlCopyCharMultiByte (out, val));
1549 }
1550 *out = (xmlChar) val;
1551 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001552}
1553
1554/************************************************************************
1555 * *
1556 * Commodity functions to switch encodings *
1557 * *
1558 ************************************************************************/
1559
1560/**
1561 * xmlSwitchEncoding:
1562 * @ctxt: the parser context
1563 * @enc: the encoding value (number)
1564 *
1565 * change the input functions when discovering the character encoding
1566 * of a given entity.
1567 *
1568 * Returns 0 in case of success, -1 otherwise
1569 */
1570int
1571xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1572{
1573 xmlCharEncodingHandlerPtr handler;
1574
1575 switch (enc) {
1576 case XML_CHAR_ENCODING_ERROR:
1577 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1579 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1580 ctxt->wellFormed = 0;
1581 ctxt->disableSAX = 1;
1582 break;
1583 case XML_CHAR_ENCODING_NONE:
1584 /* let's assume it's UTF-8 without the XML decl */
1585 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1586 return(0);
1587 case XML_CHAR_ENCODING_UTF8:
1588 /* default encoding, no conversion should be needed */
1589 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001590
1591 /*
1592 * Errata on XML-1.0 June 20 2001
1593 * Specific handling of the Byte Order Mark for
1594 * UTF-8
1595 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001596 if ((ctxt->input != NULL) &&
1597 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001598 (ctxt->input->cur[1] == 0xBB) &&
1599 (ctxt->input->cur[2] == 0xBF)) {
1600 ctxt->input->cur += 3;
1601 }
Owen Taylor3473f882001-02-23 17:55:21 +00001602 return(0);
1603 default:
1604 break;
1605 }
1606 handler = xmlGetCharEncodingHandler(enc);
1607 if (handler == NULL) {
1608 /*
1609 * Default handlers.
1610 */
1611 switch (enc) {
1612 case XML_CHAR_ENCODING_ERROR:
1613 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1616 ctxt->wellFormed = 0;
1617 ctxt->disableSAX = 1;
1618 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1619 break;
1620 case XML_CHAR_ENCODING_NONE:
1621 /* let's assume it's UTF-8 without the XML decl */
1622 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1623 return(0);
1624 case XML_CHAR_ENCODING_UTF8:
1625 case XML_CHAR_ENCODING_ASCII:
1626 /* default encoding, no conversion should be needed */
1627 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1628 return(0);
1629 case XML_CHAR_ENCODING_UTF16LE:
1630 break;
1631 case XML_CHAR_ENCODING_UTF16BE:
1632 break;
1633 case XML_CHAR_ENCODING_UCS4LE:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding USC4 little endian not supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_UCS4BE:
1640 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642 ctxt->sax->error(ctxt->userData,
1643 "char encoding USC4 big endian not supported\n");
1644 break;
1645 case XML_CHAR_ENCODING_EBCDIC:
1646 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "char encoding EBCDIC not supported\n");
1650 break;
1651 case XML_CHAR_ENCODING_UCS4_2143:
1652 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData,
1655 "char encoding UCS4 2143 not supported\n");
1656 break;
1657 case XML_CHAR_ENCODING_UCS4_3412:
1658 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "char encoding UCS4 3412 not supported\n");
1662 break;
1663 case XML_CHAR_ENCODING_UCS2:
1664 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "char encoding UCS2 not supported\n");
1668 break;
1669 case XML_CHAR_ENCODING_8859_1:
1670 case XML_CHAR_ENCODING_8859_2:
1671 case XML_CHAR_ENCODING_8859_3:
1672 case XML_CHAR_ENCODING_8859_4:
1673 case XML_CHAR_ENCODING_8859_5:
1674 case XML_CHAR_ENCODING_8859_6:
1675 case XML_CHAR_ENCODING_8859_7:
1676 case XML_CHAR_ENCODING_8859_8:
1677 case XML_CHAR_ENCODING_8859_9:
1678 /*
1679 * We used to keep the internal content in the
1680 * document encoding however this turns being unmaintainable
1681 * So xmlGetCharEncodingHandler() will return non-null
1682 * values for this now.
1683 */
1684 if ((ctxt->inputNr == 1) &&
1685 (ctxt->encoding == NULL) &&
1686 (ctxt->input->encoding != NULL)) {
1687 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1688 }
1689 ctxt->charset = enc;
1690 return(0);
1691 case XML_CHAR_ENCODING_2022_JP:
1692 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1694 ctxt->sax->error(ctxt->userData,
1695 "char encoding ISO-2022-JPnot supported\n");
1696 break;
1697 case XML_CHAR_ENCODING_SHIFT_JIS:
1698 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1700 ctxt->sax->error(ctxt->userData,
1701 "char encoding Shift_JIS not supported\n");
1702 break;
1703 case XML_CHAR_ENCODING_EUC_JP:
1704 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706 ctxt->sax->error(ctxt->userData,
1707 "char encoding EUC-JPnot supported\n");
1708 break;
1709 }
1710 }
1711 if (handler == NULL)
1712 return(-1);
1713 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1714 return(xmlSwitchToEncoding(ctxt, handler));
1715}
1716
1717/**
1718 * xmlSwitchToEncoding:
1719 * @ctxt: the parser context
1720 * @handler: the encoding handler
1721 *
1722 * change the input functions when discovering the character encoding
1723 * of a given entity.
1724 *
1725 * Returns 0 in case of success, -1 otherwise
1726 */
1727int
1728xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1729{
1730 int nbchars;
1731
1732 if (handler != NULL) {
1733 if (ctxt->input != NULL) {
1734 if (ctxt->input->buf != NULL) {
1735 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001736 /*
1737 * Check in case the auto encoding detetection triggered
1738 * in already.
1739 */
Owen Taylor3473f882001-02-23 17:55:21 +00001740 if (ctxt->input->buf->encoder == handler)
1741 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001742
1743 /*
1744 * "UTF-16" can be used for both LE and BE
1745 */
1746 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1747 BAD_CAST "UTF-16", 6)) &&
1748 (!xmlStrncmp(BAD_CAST handler->name,
1749 BAD_CAST "UTF-16", 6))) {
1750 return(0);
1751 }
1752
Owen Taylor3473f882001-02-23 17:55:21 +00001753 /*
1754 * Note: this is a bit dangerous, but that's what it
1755 * takes to use nearly compatible signature for different
1756 * encodings.
1757 */
1758 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1759 ctxt->input->buf->encoder = handler;
1760 return(0);
1761 }
1762 ctxt->input->buf->encoder = handler;
1763
1764 /*
1765 * Is there already some content down the pipe to convert ?
1766 */
1767 if ((ctxt->input->buf->buffer != NULL) &&
1768 (ctxt->input->buf->buffer->use > 0)) {
1769 int processed;
1770
1771 /*
1772 * Specific handling of the Byte Order Mark for
1773 * UTF-16
1774 */
1775 if ((handler->name != NULL) &&
1776 (!strcmp(handler->name, "UTF-16LE")) &&
1777 (ctxt->input->cur[0] == 0xFF) &&
1778 (ctxt->input->cur[1] == 0xFE)) {
1779 ctxt->input->cur += 2;
1780 }
1781 if ((handler->name != NULL) &&
1782 (!strcmp(handler->name, "UTF-16BE")) &&
1783 (ctxt->input->cur[0] == 0xFE) &&
1784 (ctxt->input->cur[1] == 0xFF)) {
1785 ctxt->input->cur += 2;
1786 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001787 /*
1788 * Errata on XML-1.0 June 20 2001
1789 * Specific handling of the Byte Order Mark for
1790 * UTF-8
1791 */
1792 if ((handler->name != NULL) &&
1793 (!strcmp(handler->name, "UTF-8")) &&
1794 (ctxt->input->cur[0] == 0xEF) &&
1795 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001796 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001797 ctxt->input->cur += 3;
1798 }
Owen Taylor3473f882001-02-23 17:55:21 +00001799
1800 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001801 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001802 * Move it as the raw buffer and create a new input buffer
1803 */
1804 processed = ctxt->input->cur - ctxt->input->base;
1805 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1806 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1807 ctxt->input->buf->buffer = xmlBufferCreate();
1808
1809 if (ctxt->html) {
1810 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001811 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001812 */
1813 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1814 ctxt->input->buf->buffer,
1815 ctxt->input->buf->raw);
1816 } else {
1817 /*
1818 * convert just enough to get
1819 * '<?xml version="1.0" encoding="xxx"?>'
1820 * parsed with the autodetected encoding
1821 * into the parser reading buffer.
1822 */
1823 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1824 ctxt->input->buf->buffer,
1825 ctxt->input->buf->raw);
1826 }
1827 if (nbchars < 0) {
1828 xmlGenericError(xmlGenericErrorContext,
1829 "xmlSwitchToEncoding: encoder error\n");
1830 return(-1);
1831 }
1832 ctxt->input->base =
1833 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001834 ctxt->input->end =
1835 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001836
1837 }
1838 return(0);
1839 } else {
1840 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1841 /*
1842 * When parsing a static memory array one must know the
1843 * size to be able to convert the buffer.
1844 */
1845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1846 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001847 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001848 return(-1);
1849 } else {
1850 int processed;
1851
1852 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001853 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001854 * Move it as the raw buffer and create a new input buffer
1855 */
1856 processed = ctxt->input->cur - ctxt->input->base;
1857
1858 ctxt->input->buf->raw = xmlBufferCreate();
1859 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1860 ctxt->input->length - processed);
1861 ctxt->input->buf->buffer = xmlBufferCreate();
1862
1863 /*
1864 * convert as much as possible of the raw input
1865 * to the parser reading buffer.
1866 */
1867 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1868 ctxt->input->buf->buffer,
1869 ctxt->input->buf->raw);
1870 if (nbchars < 0) {
1871 xmlGenericError(xmlGenericErrorContext,
1872 "xmlSwitchToEncoding: encoder error\n");
1873 return(-1);
1874 }
1875
1876 /*
1877 * Conversion succeeded, get rid of the old buffer
1878 */
1879 if ((ctxt->input->free != NULL) &&
1880 (ctxt->input->base != NULL))
1881 ctxt->input->free((xmlChar *) ctxt->input->base);
1882 ctxt->input->base =
1883 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001884 ctxt->input->end =
1885 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001886 }
1887 }
1888 } else {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001891 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001892 return(-1);
1893 }
1894 /*
1895 * The parsing is now done in UTF8 natively
1896 */
1897 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1898 } else
1899 return(-1);
1900 return(0);
1901
1902}
1903
1904/************************************************************************
1905 * *
1906 * Commodity functions to handle entities processing *
1907 * *
1908 ************************************************************************/
1909
1910/**
1911 * xmlFreeInputStream:
1912 * @input: an xmlParserInputPtr
1913 *
1914 * Free up an input stream.
1915 */
1916void
1917xmlFreeInputStream(xmlParserInputPtr input) {
1918 if (input == NULL) return;
1919
1920 if (input->filename != NULL) xmlFree((char *) input->filename);
1921 if (input->directory != NULL) xmlFree((char *) input->directory);
1922 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1923 if (input->version != NULL) xmlFree((char *) input->version);
1924 if ((input->free != NULL) && (input->base != NULL))
1925 input->free((xmlChar *) input->base);
1926 if (input->buf != NULL)
1927 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001928 xmlFree(input);
1929}
1930
1931/**
1932 * xmlNewInputStream:
1933 * @ctxt: an XML parser context
1934 *
1935 * Create a new input stream structure
1936 * Returns the new input stream or NULL
1937 */
1938xmlParserInputPtr
1939xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1940 xmlParserInputPtr input;
1941
1942 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1943 if (input == NULL) {
1944 if (ctxt != NULL) {
1945 ctxt->errNo = XML_ERR_NO_MEMORY;
1946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1947 ctxt->sax->error(ctxt->userData,
1948 "malloc: couldn't allocate a new input stream\n");
1949 ctxt->errNo = XML_ERR_NO_MEMORY;
1950 }
1951 return(NULL);
1952 }
1953 memset(input, 0, sizeof(xmlParserInput));
1954 input->line = 1;
1955 input->col = 1;
1956 input->standalone = -1;
1957 return(input);
1958}
1959
1960/**
1961 * xmlNewIOInputStream:
1962 * @ctxt: an XML parser context
1963 * @input: an I/O Input
1964 * @enc: the charset encoding if known
1965 *
1966 * Create a new input stream structure encapsulating the @input into
1967 * a stream suitable for the parser.
1968 *
1969 * Returns the new input stream or NULL
1970 */
1971xmlParserInputPtr
1972xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1973 xmlCharEncoding enc) {
1974 xmlParserInputPtr inputStream;
1975
1976 if (xmlParserDebugEntities)
1977 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1978 inputStream = xmlNewInputStream(ctxt);
1979 if (inputStream == NULL) {
1980 return(NULL);
1981 }
1982 inputStream->filename = NULL;
1983 inputStream->buf = input;
1984 inputStream->base = inputStream->buf->buffer->content;
1985 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001986 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001987 if (enc != XML_CHAR_ENCODING_NONE) {
1988 xmlSwitchEncoding(ctxt, enc);
1989 }
1990
1991 return(inputStream);
1992}
1993
1994/**
1995 * xmlNewEntityInputStream:
1996 * @ctxt: an XML parser context
1997 * @entity: an Entity pointer
1998 *
1999 * Create a new input stream based on an xmlEntityPtr
2000 *
2001 * Returns the new input stream or NULL
2002 */
2003xmlParserInputPtr
2004xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2005 xmlParserInputPtr input;
2006
2007 if (entity == NULL) {
2008 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2010 ctxt->sax->error(ctxt->userData,
2011 "internal: xmlNewEntityInputStream entity = NULL\n");
2012 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2013 return(NULL);
2014 }
2015 if (xmlParserDebugEntities)
2016 xmlGenericError(xmlGenericErrorContext,
2017 "new input from entity: %s\n", entity->name);
2018 if (entity->content == NULL) {
2019 switch (entity->etype) {
2020 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2021 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData,
2024 "xmlNewEntityInputStream unparsed entity !\n");
2025 break;
2026 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2027 case XML_EXTERNAL_PARAMETER_ENTITY:
2028 return(xmlLoadExternalEntity((char *) entity->URI,
2029 (char *) entity->ExternalID, ctxt));
2030 case XML_INTERNAL_GENERAL_ENTITY:
2031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt->userData,
2033 "Internal entity %s without content !\n", entity->name);
2034 break;
2035 case XML_INTERNAL_PARAMETER_ENTITY:
2036 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038 ctxt->sax->error(ctxt->userData,
2039 "Internal parameter entity %s without content !\n", entity->name);
2040 break;
2041 case XML_INTERNAL_PREDEFINED_ENTITY:
2042 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2044 ctxt->sax->error(ctxt->userData,
2045 "Predefined entity %s without content !\n", entity->name);
2046 break;
2047 }
2048 return(NULL);
2049 }
2050 input = xmlNewInputStream(ctxt);
2051 if (input == NULL) {
2052 return(NULL);
2053 }
2054 input->filename = (char *) entity->URI;
2055 input->base = entity->content;
2056 input->cur = entity->content;
2057 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002058 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002059 return(input);
2060}
2061
2062/**
2063 * xmlNewStringInputStream:
2064 * @ctxt: an XML parser context
2065 * @buffer: an memory buffer
2066 *
2067 * Create a new input stream based on a memory buffer.
2068 * Returns the new input stream
2069 */
2070xmlParserInputPtr
2071xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2072 xmlParserInputPtr input;
2073
2074 if (buffer == NULL) {
2075 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2077 ctxt->sax->error(ctxt->userData,
2078 "internal: xmlNewStringInputStream string = NULL\n");
2079 return(NULL);
2080 }
2081 if (xmlParserDebugEntities)
2082 xmlGenericError(xmlGenericErrorContext,
2083 "new fixed input: %.30s\n", buffer);
2084 input = xmlNewInputStream(ctxt);
2085 if (input == NULL) {
2086 return(NULL);
2087 }
2088 input->base = buffer;
2089 input->cur = buffer;
2090 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002091 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(input);
2093}
2094
2095/**
2096 * xmlNewInputFromFile:
2097 * @ctxt: an XML parser context
2098 * @filename: the filename to use as entity
2099 *
2100 * Create a new input stream based on a file.
2101 *
2102 * Returns the new input stream or NULL in case of error
2103 */
2104xmlParserInputPtr
2105xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2106 xmlParserInputBufferPtr buf;
2107 xmlParserInputPtr inputStream;
2108 char *directory = NULL;
2109 xmlChar *URI = NULL;
2110
2111 if (xmlParserDebugEntities)
2112 xmlGenericError(xmlGenericErrorContext,
2113 "new input from file: %s\n", filename);
2114 if (ctxt == NULL) return(NULL);
2115 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2116 if (buf == NULL)
2117 return(NULL);
2118
2119 URI = xmlStrdup((xmlChar *) filename);
2120 directory = xmlParserGetDirectory((const char *) URI);
2121
2122 inputStream = xmlNewInputStream(ctxt);
2123 if (inputStream == NULL) {
2124 if (directory != NULL) xmlFree((char *) directory);
2125 if (URI != NULL) xmlFree((char *) URI);
2126 return(NULL);
2127 }
2128
2129 inputStream->filename = (const char *) URI;
2130 inputStream->directory = directory;
2131 inputStream->buf = buf;
2132
2133 inputStream->base = inputStream->buf->buffer->content;
2134 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002135 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if ((ctxt->directory == NULL) && (directory != NULL))
2137 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2138 return(inputStream);
2139}
2140
2141/************************************************************************
2142 * *
2143 * Commodity functions to handle parser contexts *
2144 * *
2145 ************************************************************************/
2146
2147/**
2148 * xmlInitParserCtxt:
2149 * @ctxt: an XML parser context
2150 *
2151 * Initialize a parser context
2152 */
2153
2154void
2155xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2156{
2157 xmlSAXHandler *sax;
2158
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002159 if(ctxt==NULL) {
2160 xmlGenericError(xmlGenericErrorContext,
2161 "xmlInitParserCtxt: NULL context given\n");
2162 return;
2163 }
2164
Owen Taylor3473f882001-02-23 17:55:21 +00002165 xmlDefaultSAXHandlerInit();
2166
2167 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2168 if (sax == NULL) {
2169 xmlGenericError(xmlGenericErrorContext,
2170 "xmlInitParserCtxt: out of memory\n");
2171 }
2172 else
2173 memset(sax, 0, sizeof(xmlSAXHandler));
2174
2175 /* Allocate the Input stack */
2176 ctxt->inputTab = (xmlParserInputPtr *)
2177 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2178 if (ctxt->inputTab == NULL) {
2179 xmlGenericError(xmlGenericErrorContext,
2180 "xmlInitParserCtxt: out of memory\n");
2181 ctxt->inputNr = 0;
2182 ctxt->inputMax = 0;
2183 ctxt->input = NULL;
2184 return;
2185 }
2186 ctxt->inputNr = 0;
2187 ctxt->inputMax = 5;
2188 ctxt->input = NULL;
2189
2190 ctxt->version = NULL;
2191 ctxt->encoding = NULL;
2192 ctxt->standalone = -1;
2193 ctxt->hasExternalSubset = 0;
2194 ctxt->hasPErefs = 0;
2195 ctxt->html = 0;
2196 ctxt->external = 0;
2197 ctxt->instate = XML_PARSER_START;
2198 ctxt->token = 0;
2199 ctxt->directory = NULL;
2200
2201 /* Allocate the Node stack */
2202 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2203 if (ctxt->nodeTab == NULL) {
2204 xmlGenericError(xmlGenericErrorContext,
2205 "xmlInitParserCtxt: out of memory\n");
2206 ctxt->nodeNr = 0;
2207 ctxt->nodeMax = 0;
2208 ctxt->node = NULL;
2209 ctxt->inputNr = 0;
2210 ctxt->inputMax = 0;
2211 ctxt->input = NULL;
2212 return;
2213 }
2214 ctxt->nodeNr = 0;
2215 ctxt->nodeMax = 10;
2216 ctxt->node = NULL;
2217
2218 /* Allocate the Name stack */
2219 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2220 if (ctxt->nameTab == NULL) {
2221 xmlGenericError(xmlGenericErrorContext,
2222 "xmlInitParserCtxt: out of memory\n");
2223 ctxt->nodeNr = 0;
2224 ctxt->nodeMax = 0;
2225 ctxt->node = NULL;
2226 ctxt->inputNr = 0;
2227 ctxt->inputMax = 0;
2228 ctxt->input = NULL;
2229 ctxt->nameNr = 0;
2230 ctxt->nameMax = 0;
2231 ctxt->name = NULL;
2232 return;
2233 }
2234 ctxt->nameNr = 0;
2235 ctxt->nameMax = 10;
2236 ctxt->name = NULL;
2237
2238 /* Allocate the space stack */
2239 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2240 if (ctxt->spaceTab == NULL) {
2241 xmlGenericError(xmlGenericErrorContext,
2242 "xmlInitParserCtxt: out of memory\n");
2243 ctxt->nodeNr = 0;
2244 ctxt->nodeMax = 0;
2245 ctxt->node = NULL;
2246 ctxt->inputNr = 0;
2247 ctxt->inputMax = 0;
2248 ctxt->input = NULL;
2249 ctxt->nameNr = 0;
2250 ctxt->nameMax = 0;
2251 ctxt->name = NULL;
2252 ctxt->spaceNr = 0;
2253 ctxt->spaceMax = 0;
2254 ctxt->space = NULL;
2255 return;
2256 }
2257 ctxt->spaceNr = 1;
2258 ctxt->spaceMax = 10;
2259 ctxt->spaceTab[0] = -1;
2260 ctxt->space = &ctxt->spaceTab[0];
2261
Daniel Veillard14be0a12001-03-03 18:50:55 +00002262 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002263 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002264
Owen Taylor3473f882001-02-23 17:55:21 +00002265 ctxt->userData = ctxt;
2266 ctxt->myDoc = NULL;
2267 ctxt->wellFormed = 1;
2268 ctxt->valid = 1;
2269 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2270 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2271 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002272 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002273 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002274 if (ctxt->keepBlanks == 0)
2275 sax->ignorableWhitespace = ignorableWhitespace;
2276
Owen Taylor3473f882001-02-23 17:55:21 +00002277 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002278 ctxt->vctxt.error = xmlParserValidityError;
2279 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002280 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (xmlGetWarningsDefaultValue == 0)
2282 ctxt->vctxt.warning = NULL;
2283 else
2284 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002285 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 }
2287 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2288 ctxt->record_info = 0;
2289 ctxt->nbChars = 0;
2290 ctxt->checkIndex = 0;
2291 ctxt->inSubset = 0;
2292 ctxt->errNo = XML_ERR_OK;
2293 ctxt->depth = 0;
2294 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002295 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002296 xmlInitNodeInfoSeq(&ctxt->node_seq);
2297}
2298
2299/**
2300 * xmlFreeParserCtxt:
2301 * @ctxt: an XML parser context
2302 *
2303 * Free all the memory used by a parser context. However the parsed
2304 * document in ctxt->myDoc is not freed.
2305 */
2306
2307void
2308xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2309{
2310 xmlParserInputPtr input;
2311 xmlChar *oldname;
2312
2313 if (ctxt == NULL) return;
2314
2315 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2316 xmlFreeInputStream(input);
2317 }
2318 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2319 xmlFree(oldname);
2320 }
2321 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2322 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2323 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2324 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2325 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2326 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2327 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2328 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2329 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002330 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2331 xmlFree(ctxt->sax);
2332 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002333 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002334#ifdef LIBXML_CATALOG_ENABLED
2335 if (ctxt->catalogs != NULL)
2336 xmlCatalogFreeLocal(ctxt->catalogs);
2337#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002338 xmlFree(ctxt);
2339}
2340
2341/**
2342 * xmlNewParserCtxt:
2343 *
2344 * Allocate and initialize a new parser context.
2345 *
2346 * Returns the xmlParserCtxtPtr or NULL
2347 */
2348
2349xmlParserCtxtPtr
2350xmlNewParserCtxt()
2351{
2352 xmlParserCtxtPtr ctxt;
2353
2354 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2355 if (ctxt == NULL) {
2356 xmlGenericError(xmlGenericErrorContext,
2357 "xmlNewParserCtxt : cannot allocate context\n");
2358 perror("malloc");
2359 return(NULL);
2360 }
2361 memset(ctxt, 0, sizeof(xmlParserCtxt));
2362 xmlInitParserCtxt(ctxt);
2363 return(ctxt);
2364}
2365
2366/************************************************************************
2367 * *
2368 * Handling of node informations *
2369 * *
2370 ************************************************************************/
2371
2372/**
2373 * xmlClearParserCtxt:
2374 * @ctxt: an XML parser context
2375 *
2376 * Clear (release owned resources) and reinitialize a parser context
2377 */
2378
2379void
2380xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2381{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002382 if (ctxt==NULL)
2383 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002384 xmlClearNodeInfoSeq(&ctxt->node_seq);
2385 xmlInitParserCtxt(ctxt);
2386}
2387
2388/**
2389 * xmlParserFindNodeInfo:
2390 * @ctxt: an XML parser context
2391 * @node: an XML node within the tree
2392 *
2393 * Find the parser node info struct for a given node
2394 *
2395 * Returns an xmlParserNodeInfo block pointer or NULL
2396 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002397const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2398 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002399{
2400 unsigned long pos;
2401
2402 /* Find position where node should be at */
2403 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002404 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002405 return &ctx->node_seq.buffer[pos];
2406 else
2407 return NULL;
2408}
2409
2410
2411/**
2412 * xmlInitNodeInfoSeq:
2413 * @seq: a node info sequence pointer
2414 *
2415 * -- Initialize (set to initial state) node info sequence
2416 */
2417void
2418xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2419{
2420 seq->length = 0;
2421 seq->maximum = 0;
2422 seq->buffer = NULL;
2423}
2424
2425/**
2426 * xmlClearNodeInfoSeq:
2427 * @seq: a node info sequence pointer
2428 *
2429 * -- Clear (release memory and reinitialize) node
2430 * info sequence
2431 */
2432void
2433xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2434{
2435 if ( seq->buffer != NULL )
2436 xmlFree(seq->buffer);
2437 xmlInitNodeInfoSeq(seq);
2438}
2439
2440
2441/**
2442 * xmlParserFindNodeInfoIndex:
2443 * @seq: a node info sequence pointer
2444 * @node: an XML node pointer
2445 *
2446 *
2447 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2448 * the given node is or should be at in a sorted sequence
2449 *
2450 * Returns a long indicating the position of the record
2451 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002452unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2453 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002454{
2455 unsigned long upper, lower, middle;
2456 int found = 0;
2457
2458 /* Do a binary search for the key */
2459 lower = 1;
2460 upper = seq->length;
2461 middle = 0;
2462 while ( lower <= upper && !found) {
2463 middle = lower + (upper - lower) / 2;
2464 if ( node == seq->buffer[middle - 1].node )
2465 found = 1;
2466 else if ( node < seq->buffer[middle - 1].node )
2467 upper = middle - 1;
2468 else
2469 lower = middle + 1;
2470 }
2471
2472 /* Return position */
2473 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2474 return middle;
2475 else
2476 return middle - 1;
2477}
2478
2479
2480/**
2481 * xmlParserAddNodeInfo:
2482 * @ctxt: an XML parser context
2483 * @info: a node info sequence pointer
2484 *
2485 * Insert node info record into the sorted sequence
2486 */
2487void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002488xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002489 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002490{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002491 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002492
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002493 /* Find pos and check to see if node is already in the sequence */
2494 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2495 info->node);
2496 if (pos < ctxt->node_seq.length
2497 && ctxt->node_seq.buffer[pos].node == info->node) {
2498 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002499 }
2500
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002501 /* Otherwise, we need to add new node to buffer */
2502 else {
2503 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2504 xmlParserNodeInfo *tmp_buffer;
2505 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002506
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002507 if (ctxt->node_seq.maximum == 0)
2508 ctxt->node_seq.maximum = 2;
2509 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2510 (2 * ctxt->node_seq.maximum));
2511
2512 if (ctxt->node_seq.buffer == NULL)
2513 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2514 else
2515 tmp_buffer =
2516 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2517 byte_size);
2518
2519 if (tmp_buffer == NULL) {
2520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2522 ctxt->errNo = XML_ERR_NO_MEMORY;
2523 return;
2524 }
2525 ctxt->node_seq.buffer = tmp_buffer;
2526 ctxt->node_seq.maximum *= 2;
2527 }
2528
2529 /* If position is not at end, move elements out of the way */
2530 if (pos != ctxt->node_seq.length) {
2531 unsigned long i;
2532
2533 for (i = ctxt->node_seq.length; i > pos; i--)
2534 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2535 }
2536
2537 /* Copy element and increase length */
2538 ctxt->node_seq.buffer[pos] = *info;
2539 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
Owen Taylor3473f882001-02-23 17:55:21 +00002541}
2542
2543/************************************************************************
2544 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002545 * Defaults settings *
2546 * *
2547 ************************************************************************/
2548/**
2549 * xmlPedanticParserDefault:
2550 * @val: int 0 or 1
2551 *
2552 * Set and return the previous value for enabling pedantic warnings.
2553 *
2554 * Returns the last value for 0 for no substitution, 1 for substitution.
2555 */
2556
2557int
2558xmlPedanticParserDefault(int val) {
2559 int old = xmlPedanticParserDefaultValue;
2560
2561 xmlPedanticParserDefaultValue = val;
2562 return(old);
2563}
2564
2565/**
2566 * xmlLineNumbersDefault:
2567 * @val: int 0 or 1
2568 *
2569 * Set and return the previous value for enabling line numbers in elements
2570 * contents. This may break on old application and is turned off by default.
2571 *
2572 * Returns the last value for 0 for no substitution, 1 for substitution.
2573 */
2574
2575int
2576xmlLineNumbersDefault(int val) {
2577 int old = xmlLineNumbersDefaultValue;
2578
2579 xmlLineNumbersDefaultValue = val;
2580 return(old);
2581}
2582
2583/**
2584 * xmlSubstituteEntitiesDefault:
2585 * @val: int 0 or 1
2586 *
2587 * Set and return the previous value for default entity support.
2588 * Initially the parser always keep entity references instead of substituting
2589 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002590 * default parser behavior
2591 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002592 * file basis.
2593 *
2594 * Returns the last value for 0 for no substitution, 1 for substitution.
2595 */
2596
2597int
2598xmlSubstituteEntitiesDefault(int val) {
2599 int old = xmlSubstituteEntitiesDefaultValue;
2600
2601 xmlSubstituteEntitiesDefaultValue = val;
2602 return(old);
2603}
2604
2605/**
2606 * xmlKeepBlanksDefault:
2607 * @val: int 0 or 1
2608 *
2609 * Set and return the previous value for default blanks text nodes support.
2610 * The 1.x version of the parser used an heuristic to try to detect
2611 * ignorable white spaces. As a result the SAX callback was generating
2612 * ignorableWhitespace() callbacks instead of characters() one, and when
2613 * using the DOM output text nodes containing those blanks were not generated.
2614 * The 2.x and later version will switch to the XML standard way and
2615 * ignorableWhitespace() are only generated when running the parser in
2616 * validating mode and when the current element doesn't allow CDATA or
2617 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002618 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002619 * on 1.X libs and to switch back to the old mode for compatibility when
2620 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2621 * by using xmlIsBlankNode() commodity function to detect the "empty"
2622 * nodes generated.
2623 * This value also affect autogeneration of indentation when saving code
2624 * if blanks sections are kept, indentation is not generated.
2625 *
2626 * Returns the last value for 0 for no substitution, 1 for substitution.
2627 */
2628
2629int
2630xmlKeepBlanksDefault(int val) {
2631 int old = xmlKeepBlanksDefaultValue;
2632
2633 xmlKeepBlanksDefaultValue = val;
2634 xmlIndentTreeOutput = !val;
2635 return(old);
2636}
2637
2638/************************************************************************
2639 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002640 * Deprecated functions kept for compatibility *
2641 * *
2642 ************************************************************************/
2643
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002644/**
2645 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002646 * @lang: pointer to the string value
2647 *
2648 * Checks that the value conforms to the LanguageID production:
2649 *
2650 * NOTE: this is somewhat deprecated, those productions were removed from
2651 * the XML Second edition.
2652 *
2653 * [33] LanguageID ::= Langcode ('-' Subcode)*
2654 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2655 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2656 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2657 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2658 * [38] Subcode ::= ([a-z] | [A-Z])+
2659 *
2660 * Returns 1 if correct 0 otherwise
2661 **/
2662int
2663xmlCheckLanguageID(const xmlChar *lang) {
2664 const xmlChar *cur = lang;
2665
2666 if (cur == NULL)
2667 return(0);
2668 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2669 ((cur[0] == 'I') && (cur[1] == '-'))) {
2670 /*
2671 * IANA code
2672 */
2673 cur += 2;
2674 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2675 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2676 cur++;
2677 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2678 ((cur[0] == 'X') && (cur[1] == '-'))) {
2679 /*
2680 * User code
2681 */
2682 cur += 2;
2683 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2684 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2685 cur++;
2686 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2687 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2688 /*
2689 * ISO639
2690 */
2691 cur++;
2692 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2693 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2694 cur++;
2695 else
2696 return(0);
2697 } else
2698 return(0);
2699 while (cur[0] != 0) { /* non input consuming */
2700 if (cur[0] != '-')
2701 return(0);
2702 cur++;
2703 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2704 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2705 cur++;
2706 else
2707 return(0);
2708 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2709 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2710 cur++;
2711 }
2712 return(1);
2713}
2714
2715/**
2716 * xmlDecodeEntities:
2717 * @ctxt: the parser context
2718 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2719 * @len: the len to decode (in bytes !), -1 for no size limit
2720 * @end: an end marker xmlChar, 0 if none
2721 * @end2: an end marker xmlChar, 0 if none
2722 * @end3: an end marker xmlChar, 0 if none
2723 *
2724 * This function is deprecated, we now always process entities content
2725 * through xmlStringDecodeEntities
2726 *
2727 * TODO: remove it in next major release.
2728 *
2729 * [67] Reference ::= EntityRef | CharRef
2730 *
2731 * [69] PEReference ::= '%' Name ';'
2732 *
2733 * Returns A newly allocated string with the substitution done. The caller
2734 * must deallocate it !
2735 */
2736xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002737xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2738 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002739#if 0
2740 xmlChar *buffer = NULL;
2741 unsigned int buffer_size = 0;
2742 unsigned int nbchars = 0;
2743
2744 xmlChar *current = NULL;
2745 xmlEntityPtr ent;
2746 unsigned int max = (unsigned int) len;
2747 int c,l;
2748#endif
2749
2750 static int deprecated = 0;
2751 if (!deprecated) {
2752 xmlGenericError(xmlGenericErrorContext,
2753 "xmlDecodeEntities() deprecated function reached\n");
2754 deprecated = 1;
2755 }
2756
2757#if 0
2758 if (ctxt->depth > 40) {
2759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2760 ctxt->sax->error(ctxt->userData,
2761 "Detected entity reference loop\n");
2762 ctxt->wellFormed = 0;
2763 ctxt->disableSAX = 1;
2764 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2765 return(NULL);
2766 }
2767
2768 /*
2769 * allocate a translation buffer.
2770 */
2771 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2772 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2773 if (buffer == NULL) {
2774 perror("xmlDecodeEntities: malloc failed");
2775 return(NULL);
2776 }
2777
2778 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002779 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002780 */
2781 GROW;
2782 c = CUR_CHAR(l);
2783 while ((nbchars < max) && (c != end) && /* NOTUSED */
2784 (c != end2) && (c != end3)) {
2785 GROW;
2786 if (c == 0) break;
2787 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2788 int val = xmlParseCharRef(ctxt);
2789 COPY_BUF(0,buffer,nbchars,val);
2790 NEXTL(l);
2791 } else if ((c == '&') && (ctxt->token != '&') &&
2792 (what & XML_SUBSTITUTE_REF)) {
2793 if (xmlParserDebugEntities)
2794 xmlGenericError(xmlGenericErrorContext,
2795 "decoding Entity Reference\n");
2796 ent = xmlParseEntityRef(ctxt);
2797 if ((ent != NULL) &&
2798 (ctxt->replaceEntities != 0)) {
2799 current = ent->content;
2800 while (*current != 0) { /* non input consuming loop */
2801 buffer[nbchars++] = *current++;
2802 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2803 growBuffer(buffer);
2804 }
2805 }
2806 } else if (ent != NULL) {
2807 const xmlChar *cur = ent->name;
2808
2809 buffer[nbchars++] = '&';
2810 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2811 growBuffer(buffer);
2812 }
2813 while (*cur != 0) { /* non input consuming loop */
2814 buffer[nbchars++] = *cur++;
2815 }
2816 buffer[nbchars++] = ';';
2817 }
2818 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2819 /*
2820 * a PEReference induce to switch the entity flow,
2821 * we break here to flush the current set of chars
2822 * parsed if any. We will be called back later.
2823 */
2824 if (xmlParserDebugEntities)
2825 xmlGenericError(xmlGenericErrorContext,
2826 "decoding PE Reference\n");
2827 if (nbchars != 0) break;
2828
2829 xmlParsePEReference(ctxt);
2830
2831 /*
2832 * Pop-up of finished entities.
2833 */
2834 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2835 xmlPopInput(ctxt);
2836
2837 break;
2838 } else {
2839 COPY_BUF(l,buffer,nbchars,c);
2840 NEXTL(l);
2841 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2842 growBuffer(buffer);
2843 }
2844 }
2845 c = CUR_CHAR(l);
2846 }
2847 buffer[nbchars++] = 0;
2848 return(buffer);
2849#endif
2850 return(NULL);
2851}
2852
2853/**
2854 * xmlNamespaceParseNCName:
2855 * @ctxt: an XML parser context
2856 *
2857 * parse an XML namespace name.
2858 *
2859 * TODO: this seems not in use anymore, the namespace handling is done on
2860 * top of the SAX interfaces, i.e. not on raw input.
2861 *
2862 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2863 *
2864 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2865 * CombiningChar | Extender
2866 *
2867 * Returns the namespace name or NULL
2868 */
2869
2870xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002871xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002872#if 0
2873 xmlChar buf[XML_MAX_NAMELEN + 5];
2874 int len = 0, l;
2875 int cur = CUR_CHAR(l);
2876#endif
2877
2878 static int deprecated = 0;
2879 if (!deprecated) {
2880 xmlGenericError(xmlGenericErrorContext,
2881 "xmlNamespaceParseNCName() deprecated function reached\n");
2882 deprecated = 1;
2883 }
2884
2885#if 0
2886 /* load first the value of the char !!! */
2887 GROW;
2888 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2889
2890xmlGenericError(xmlGenericErrorContext,
2891 "xmlNamespaceParseNCName: reached loop 3\n");
2892 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2893 (cur == '.') || (cur == '-') ||
2894 (cur == '_') ||
2895 (IS_COMBINING(cur)) ||
2896 (IS_EXTENDER(cur))) {
2897 COPY_BUF(l,buf,len,cur);
2898 NEXTL(l);
2899 cur = CUR_CHAR(l);
2900 if (len >= XML_MAX_NAMELEN) {
2901 xmlGenericError(xmlGenericErrorContext,
2902 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2903 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2904 (cur == '.') || (cur == '-') ||
2905 (cur == '_') ||
2906 (IS_COMBINING(cur)) ||
2907 (IS_EXTENDER(cur))) {
2908 NEXTL(l);
2909 cur = CUR_CHAR(l);
2910 }
2911 break;
2912 }
2913 }
2914 return(xmlStrndup(buf, len));
2915#endif
2916 return(NULL);
2917}
2918
2919/**
2920 * xmlNamespaceParseQName:
2921 * @ctxt: an XML parser context
2922 * @prefix: a xmlChar **
2923 *
2924 * TODO: this seems not in use anymore, the namespace handling is done on
2925 * top of the SAX interfaces, i.e. not on raw input.
2926 *
2927 * parse an XML qualified name
2928 *
2929 * [NS 5] QName ::= (Prefix ':')? LocalPart
2930 *
2931 * [NS 6] Prefix ::= NCName
2932 *
2933 * [NS 7] LocalPart ::= NCName
2934 *
2935 * Returns the local part, and prefix is updated
2936 * to get the Prefix if any.
2937 */
2938
2939xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002940xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002941
2942 static int deprecated = 0;
2943 if (!deprecated) {
2944 xmlGenericError(xmlGenericErrorContext,
2945 "xmlNamespaceParseQName() deprecated function reached\n");
2946 deprecated = 1;
2947 }
2948
2949#if 0
2950 xmlChar *ret = NULL;
2951
2952 *prefix = NULL;
2953 ret = xmlNamespaceParseNCName(ctxt);
2954 if (RAW == ':') {
2955 *prefix = ret;
2956 NEXT;
2957 ret = xmlNamespaceParseNCName(ctxt);
2958 }
2959
2960 return(ret);
2961#endif
2962 return(NULL);
2963}
2964
2965/**
2966 * xmlNamespaceParseNSDef:
2967 * @ctxt: an XML parser context
2968 *
2969 * parse a namespace prefix declaration
2970 *
2971 * TODO: this seems not in use anymore, the namespace handling is done on
2972 * top of the SAX interfaces, i.e. not on raw input.
2973 *
2974 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2975 *
2976 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2977 *
2978 * Returns the namespace name
2979 */
2980
2981xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002982xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002983 static int deprecated = 0;
2984 if (!deprecated) {
2985 xmlGenericError(xmlGenericErrorContext,
2986 "xmlNamespaceParseNSDef() deprecated function reached\n");
2987 deprecated = 1;
2988 }
2989 return(NULL);
2990#if 0
2991 xmlChar *name = NULL;
2992
2993 if ((RAW == 'x') && (NXT(1) == 'm') &&
2994 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2995 (NXT(4) == 's')) {
2996 SKIP(5);
2997 if (RAW == ':') {
2998 NEXT;
2999 name = xmlNamespaceParseNCName(ctxt);
3000 }
3001 }
3002 return(name);
3003#endif
3004}
3005
3006/**
3007 * xmlParseQuotedString:
3008 * @ctxt: an XML parser context
3009 *
3010 * Parse and return a string between quotes or doublequotes
3011 *
3012 * TODO: Deprecated, to be removed at next drop of binary compatibility
3013 *
3014 * Returns the string parser or NULL.
3015 */
3016xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003017xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003018 static int deprecated = 0;
3019 if (!deprecated) {
3020 xmlGenericError(xmlGenericErrorContext,
3021 "xmlParseQuotedString() deprecated function reached\n");
3022 deprecated = 1;
3023 }
3024 return(NULL);
3025
3026#if 0
3027 xmlChar *buf = NULL;
3028 int len = 0,l;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int c;
3031
3032 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3033 if (buf == NULL) {
3034 xmlGenericError(xmlGenericErrorContext,
3035 "malloc of %d byte failed\n", size);
3036 return(NULL);
3037 }
3038xmlGenericError(xmlGenericErrorContext,
3039 "xmlParseQuotedString: reached loop 4\n");
3040 if (RAW == '"') {
3041 NEXT;
3042 c = CUR_CHAR(l);
3043 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3044 if (len + 5 >= size) {
3045 size *= 2;
3046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3047 if (buf == NULL) {
3048 xmlGenericError(xmlGenericErrorContext,
3049 "realloc of %d byte failed\n", size);
3050 return(NULL);
3051 }
3052 }
3053 COPY_BUF(l,buf,len,c);
3054 NEXTL(l);
3055 c = CUR_CHAR(l);
3056 }
3057 if (c != '"') {
3058 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3060 ctxt->sax->error(ctxt->userData,
3061 "String not closed \"%.50s\"\n", buf);
3062 ctxt->wellFormed = 0;
3063 ctxt->disableSAX = 1;
3064 } else {
3065 NEXT;
3066 }
3067 } else if (RAW == '\''){
3068 NEXT;
3069 c = CUR;
3070 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3071 if (len + 1 >= size) {
3072 size *= 2;
3073 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3074 if (buf == NULL) {
3075 xmlGenericError(xmlGenericErrorContext,
3076 "realloc of %d byte failed\n", size);
3077 return(NULL);
3078 }
3079 }
3080 buf[len++] = c;
3081 NEXT;
3082 c = CUR;
3083 }
3084 if (RAW != '\'') {
3085 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3087 ctxt->sax->error(ctxt->userData,
3088 "String not closed \"%.50s\"\n", buf);
3089 ctxt->wellFormed = 0;
3090 ctxt->disableSAX = 1;
3091 } else {
3092 NEXT;
3093 }
3094 }
3095 return(buf);
3096#endif
3097}
3098
3099/**
3100 * xmlParseNamespace:
3101 * @ctxt: an XML parser context
3102 *
3103 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3104 *
3105 * This is what the older xml-name Working Draft specified, a bunch of
3106 * other stuff may still rely on it, so support is still here as
3107 * if it was declared on the root of the Tree:-(
3108 *
3109 * TODO: remove from library
3110 *
3111 * To be removed at next drop of binary compatibility
3112 */
3113
3114void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003115xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003116 static int deprecated = 0;
3117 if (!deprecated) {
3118 xmlGenericError(xmlGenericErrorContext,
3119 "xmlParseNamespace() deprecated function reached\n");
3120 deprecated = 1;
3121 }
3122
3123#if 0
3124 xmlChar *href = NULL;
3125 xmlChar *prefix = NULL;
3126 int garbage = 0;
3127
3128 /*
3129 * We just skipped "namespace" or "xml:namespace"
3130 */
3131 SKIP_BLANKS;
3132
3133xmlGenericError(xmlGenericErrorContext,
3134 "xmlParseNamespace: reached loop 5\n");
3135 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3136 /*
3137 * We can have "ns" or "prefix" attributes
3138 * Old encoding as 'href' or 'AS' attributes is still supported
3139 */
3140 if ((RAW == 'n') && (NXT(1) == 's')) {
3141 garbage = 0;
3142 SKIP(2);
3143 SKIP_BLANKS;
3144
3145 if (RAW != '=') continue;
3146 NEXT;
3147 SKIP_BLANKS;
3148
3149 href = xmlParseQuotedString(ctxt);
3150 SKIP_BLANKS;
3151 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3152 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3153 garbage = 0;
3154 SKIP(4);
3155 SKIP_BLANKS;
3156
3157 if (RAW != '=') continue;
3158 NEXT;
3159 SKIP_BLANKS;
3160
3161 href = xmlParseQuotedString(ctxt);
3162 SKIP_BLANKS;
3163 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3164 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3165 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3166 garbage = 0;
3167 SKIP(6);
3168 SKIP_BLANKS;
3169
3170 if (RAW != '=') continue;
3171 NEXT;
3172 SKIP_BLANKS;
3173
3174 prefix = xmlParseQuotedString(ctxt);
3175 SKIP_BLANKS;
3176 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3177 garbage = 0;
3178 SKIP(2);
3179 SKIP_BLANKS;
3180
3181 if (RAW != '=') continue;
3182 NEXT;
3183 SKIP_BLANKS;
3184
3185 prefix = xmlParseQuotedString(ctxt);
3186 SKIP_BLANKS;
3187 } else if ((RAW == '?') && (NXT(1) == '>')) {
3188 garbage = 0;
3189 NEXT;
3190 } else {
3191 /*
3192 * Found garbage when parsing the namespace
3193 */
3194 if (!garbage) {
3195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData,
3197 "xmlParseNamespace found garbage\n");
3198 }
3199 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3200 ctxt->wellFormed = 0;
3201 ctxt->disableSAX = 1;
3202 NEXT;
3203 }
3204 }
3205
3206 MOVETO_ENDTAG(CUR_PTR);
3207 NEXT;
3208
3209 /*
3210 * Register the DTD.
3211 if (href != NULL)
3212 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3213 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3214 */
3215
3216 if (prefix != NULL) xmlFree(prefix);
3217 if (href != NULL) xmlFree(href);
3218#endif
3219}
3220
3221/**
3222 * xmlScanName:
3223 * @ctxt: an XML parser context
3224 *
3225 * Trickery: parse an XML name but without consuming the input flow
3226 * Needed for rollback cases. Used only when parsing entities references.
3227 *
3228 * TODO: seems deprecated now, only used in the default part of
3229 * xmlParserHandleReference
3230 *
3231 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3232 * CombiningChar | Extender
3233 *
3234 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3235 *
3236 * [6] Names ::= Name (S Name)*
3237 *
3238 * Returns the Name parsed or NULL
3239 */
3240
3241xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003242xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003243 static int deprecated = 0;
3244 if (!deprecated) {
3245 xmlGenericError(xmlGenericErrorContext,
3246 "xmlScanName() deprecated function reached\n");
3247 deprecated = 1;
3248 }
3249 return(NULL);
3250
3251#if 0
3252 xmlChar buf[XML_MAX_NAMELEN];
3253 int len = 0;
3254
3255 GROW;
3256 if (!IS_LETTER(RAW) && (RAW != '_') &&
3257 (RAW != ':')) {
3258 return(NULL);
3259 }
3260
3261
3262 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3263 (NXT(len) == '.') || (NXT(len) == '-') ||
3264 (NXT(len) == '_') || (NXT(len) == ':') ||
3265 (IS_COMBINING(NXT(len))) ||
3266 (IS_EXTENDER(NXT(len)))) {
3267 GROW;
3268 buf[len] = NXT(len);
3269 len++;
3270 if (len >= XML_MAX_NAMELEN) {
3271 xmlGenericError(xmlGenericErrorContext,
3272 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3273 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3274 (IS_DIGIT(NXT(len))) ||
3275 (NXT(len) == '.') || (NXT(len) == '-') ||
3276 (NXT(len) == '_') || (NXT(len) == ':') ||
3277 (IS_COMBINING(NXT(len))) ||
3278 (IS_EXTENDER(NXT(len))))
3279 len++;
3280 break;
3281 }
3282 }
3283 return(xmlStrndup(buf, len));
3284#endif
3285}
3286
3287/**
3288 * xmlParserHandleReference:
3289 * @ctxt: the parser context
3290 *
3291 * TODO: Remove, now deprecated ... the test is done directly in the
3292 * content parsing
3293 * routines.
3294 *
3295 * [67] Reference ::= EntityRef | CharRef
3296 *
3297 * [68] EntityRef ::= '&' Name ';'
3298 *
3299 * [ WFC: Entity Declared ]
3300 * the Name given in the entity reference must match that in an entity
3301 * declaration, except that well-formed documents need not declare any
3302 * of the following entities: amp, lt, gt, apos, quot.
3303 *
3304 * [ WFC: Parsed Entity ]
3305 * An entity reference must not contain the name of an unparsed entity
3306 *
3307 * [66] CharRef ::= '&#' [0-9]+ ';' |
3308 * '&#x' [0-9a-fA-F]+ ';'
3309 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003310 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003311 * the handling is done accordingly to
3312 * http://www.w3.org/TR/REC-xml#entproc
3313 */
3314void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003315xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003316 static int deprecated = 0;
3317 if (!deprecated) {
3318 xmlGenericError(xmlGenericErrorContext,
3319 "xmlParserHandleReference() deprecated function reached\n");
3320 deprecated = 1;
3321 }
3322
3323#if 0
3324 xmlParserInputPtr input;
3325 xmlChar *name;
3326 xmlEntityPtr ent = NULL;
3327
3328 if (ctxt->token != 0) {
3329 return;
3330 }
3331 if (RAW != '&') return;
3332 GROW;
3333 if ((RAW == '&') && (NXT(1) == '#')) {
3334 switch(ctxt->instate) {
3335 case XML_PARSER_ENTITY_DECL:
3336 case XML_PARSER_PI:
3337 case XML_PARSER_CDATA_SECTION:
3338 case XML_PARSER_COMMENT:
3339 case XML_PARSER_SYSTEM_LITERAL:
3340 /* we just ignore it there */
3341 return;
3342 case XML_PARSER_START_TAG:
3343 return;
3344 case XML_PARSER_END_TAG:
3345 return;
3346 case XML_PARSER_EOF:
3347 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3349 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3350 ctxt->wellFormed = 0;
3351 ctxt->disableSAX = 1;
3352 return;
3353 case XML_PARSER_PROLOG:
3354 case XML_PARSER_START:
3355 case XML_PARSER_MISC:
3356 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 return;
3362 case XML_PARSER_EPILOG:
3363 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3366 ctxt->wellFormed = 0;
3367 ctxt->disableSAX = 1;
3368 return;
3369 case XML_PARSER_DTD:
3370 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3372 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003373 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003374 ctxt->wellFormed = 0;
3375 ctxt->disableSAX = 1;
3376 return;
3377 case XML_PARSER_ENTITY_VALUE:
3378 /*
3379 * NOTE: in the case of entity values, we don't do the
3380 * substitution here since we need the literal
3381 * entity value to be able to save the internal
3382 * subset of the document.
3383 * This will be handled by xmlStringDecodeEntities
3384 */
3385 return;
3386 case XML_PARSER_CONTENT:
3387 return;
3388 case XML_PARSER_ATTRIBUTE_VALUE:
3389 /* ctxt->token = xmlParseCharRef(ctxt); */
3390 return;
3391 case XML_PARSER_IGNORE:
3392 return;
3393 }
3394 return;
3395 }
3396
3397 switch(ctxt->instate) {
3398 case XML_PARSER_CDATA_SECTION:
3399 return;
3400 case XML_PARSER_PI:
3401 case XML_PARSER_COMMENT:
3402 case XML_PARSER_SYSTEM_LITERAL:
3403 case XML_PARSER_CONTENT:
3404 return;
3405 case XML_PARSER_START_TAG:
3406 return;
3407 case XML_PARSER_END_TAG:
3408 return;
3409 case XML_PARSER_EOF:
3410 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3412 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 return;
3416 case XML_PARSER_PROLOG:
3417 case XML_PARSER_START:
3418 case XML_PARSER_MISC:
3419 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3421 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3422 ctxt->wellFormed = 0;
3423 ctxt->disableSAX = 1;
3424 return;
3425 case XML_PARSER_EPILOG:
3426 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3428 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3429 ctxt->wellFormed = 0;
3430 ctxt->disableSAX = 1;
3431 return;
3432 case XML_PARSER_ENTITY_VALUE:
3433 /*
3434 * NOTE: in the case of entity values, we don't do the
3435 * substitution here since we need the literal
3436 * entity value to be able to save the internal
3437 * subset of the document.
3438 * This will be handled by xmlStringDecodeEntities
3439 */
3440 return;
3441 case XML_PARSER_ATTRIBUTE_VALUE:
3442 /*
3443 * NOTE: in the case of attributes values, we don't do the
3444 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003445 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003446 * entities. The SAX callback is called with values
3447 * without entity substitution.
3448 * This will then be handled by xmlStringDecodeEntities
3449 */
3450 return;
3451 case XML_PARSER_ENTITY_DECL:
3452 /*
3453 * we just ignore it there
3454 * the substitution will be done once the entity is referenced
3455 */
3456 return;
3457 case XML_PARSER_DTD:
3458 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3460 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003461 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003462 ctxt->wellFormed = 0;
3463 ctxt->disableSAX = 1;
3464 return;
3465 case XML_PARSER_IGNORE:
3466 return;
3467 }
3468
3469/* TODO: this seems not reached anymore .... Verify ... */
3470xmlGenericError(xmlGenericErrorContext,
3471 "Reached deprecated section in xmlParserHandleReference()\n");
3472xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003473 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003474xmlGenericError(xmlGenericErrorContext,
3475 "indicating the version: %s, thanks !\n", xmlParserVersion);
3476 NEXT;
3477 name = xmlScanName(ctxt);
3478 if (name == NULL) {
3479 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3482 ctxt->wellFormed = 0;
3483 ctxt->disableSAX = 1;
3484 ctxt->token = '&';
3485 return;
3486 }
3487 if (NXT(xmlStrlen(name)) != ';') {
3488 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3490 ctxt->sax->error(ctxt->userData,
3491 "Entity reference: ';' expected\n");
3492 ctxt->wellFormed = 0;
3493 ctxt->disableSAX = 1;
3494 ctxt->token = '&';
3495 xmlFree(name);
3496 return;
3497 }
3498 SKIP(xmlStrlen(name) + 1);
3499 if (ctxt->sax != NULL) {
3500 if (ctxt->sax->getEntity != NULL)
3501 ent = ctxt->sax->getEntity(ctxt->userData, name);
3502 }
3503
3504 /*
3505 * [ WFC: Entity Declared ]
3506 * the Name given in the entity reference must match that in an entity
3507 * declaration, except that well-formed documents need not declare any
3508 * of the following entities: amp, lt, gt, apos, quot.
3509 */
3510 if (ent == NULL)
3511 ent = xmlGetPredefinedEntity(name);
3512 if (ent == NULL) {
3513 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3515 ctxt->sax->error(ctxt->userData,
3516 "Entity reference: entity %s not declared\n",
3517 name);
3518 ctxt->wellFormed = 0;
3519 ctxt->disableSAX = 1;
3520 xmlFree(name);
3521 return;
3522 }
3523
3524 /*
3525 * [ WFC: Parsed Entity ]
3526 * An entity reference must not contain the name of an unparsed entity
3527 */
3528 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3529 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3531 ctxt->sax->error(ctxt->userData,
3532 "Entity reference to unparsed entity %s\n", name);
3533 ctxt->wellFormed = 0;
3534 ctxt->disableSAX = 1;
3535 }
3536
3537 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3538 ctxt->token = ent->content[0];
3539 xmlFree(name);
3540 return;
3541 }
3542 input = xmlNewEntityInputStream(ctxt, ent);
3543 xmlPushInput(ctxt, input);
3544 xmlFree(name);
3545#endif
3546 return;
3547}
3548
3549/**
3550 * xmlHandleEntity:
3551 * @ctxt: an XML parser context
3552 * @entity: an XML entity pointer.
3553 *
3554 * Default handling of defined entities, when should we define a new input
3555 * stream ? When do we just handle that as a set of chars ?
3556 *
3557 * OBSOLETE: to be removed at some point.
3558 */
3559
3560void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003561xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003562 static int deprecated = 0;
3563 if (!deprecated) {
3564 xmlGenericError(xmlGenericErrorContext,
3565 "xmlHandleEntity() deprecated function reached\n");
3566 deprecated = 1;
3567 }
3568
3569#if 0
3570 int len;
3571 xmlParserInputPtr input;
3572
3573 if (entity->content == NULL) {
3574 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3576 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3577 entity->name);
3578 ctxt->wellFormed = 0;
3579 ctxt->disableSAX = 1;
3580 return;
3581 }
3582 len = xmlStrlen(entity->content);
3583 if (len <= 2) goto handle_as_char;
3584
3585 /*
3586 * Redefine its content as an input stream.
3587 */
3588 input = xmlNewEntityInputStream(ctxt, entity);
3589 xmlPushInput(ctxt, input);
3590 return;
3591
3592handle_as_char:
3593 /*
3594 * Just handle the content as a set of chars.
3595 */
3596 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3597 (ctxt->sax->characters != NULL))
3598 ctxt->sax->characters(ctxt->userData, entity->content, len);
3599#endif
3600}
3601
3602/**
3603 * xmlNewGlobalNs:
3604 * @doc: the document carrying the namespace
3605 * @href: the URI associated
3606 * @prefix: the prefix for the namespace
3607 *
3608 * Creation of a Namespace, the old way using PI and without scoping
3609 * DEPRECATED !!!
3610 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003611 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003612 */
3613xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003614xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3615 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 static int deprecated = 0;
3617 if (!deprecated) {
3618 xmlGenericError(xmlGenericErrorContext,
3619 "xmlNewGlobalNs() deprecated function reached\n");
3620 deprecated = 1;
3621 }
3622 return(NULL);
3623#if 0
3624 xmlNodePtr root;
3625
3626 xmlNsPtr cur;
3627
3628 root = xmlDocGetRootElement(doc);
3629 if (root != NULL)
3630 return(xmlNewNs(root, href, prefix));
3631
3632 /*
3633 * if there is no root element yet, create an old Namespace type
3634 * and it will be moved to the root at save time.
3635 */
3636 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3637 if (cur == NULL) {
3638 xmlGenericError(xmlGenericErrorContext,
3639 "xmlNewGlobalNs : malloc failed\n");
3640 return(NULL);
3641 }
3642 memset(cur, 0, sizeof(xmlNs));
3643 cur->type = XML_GLOBAL_NAMESPACE;
3644
3645 if (href != NULL)
3646 cur->href = xmlStrdup(href);
3647 if (prefix != NULL)
3648 cur->prefix = xmlStrdup(prefix);
3649
3650 /*
3651 * Add it at the end to preserve parsing order ...
3652 */
3653 if (doc != NULL) {
3654 if (doc->oldNs == NULL) {
3655 doc->oldNs = cur;
3656 } else {
3657 xmlNsPtr prev = doc->oldNs;
3658
3659 while (prev->next != NULL) prev = prev->next;
3660 prev->next = cur;
3661 }
3662 }
3663
3664 return(NULL);
3665#endif
3666}
3667
3668/**
3669 * xmlUpgradeOldNs:
3670 * @doc: a document pointer
3671 *
3672 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3673 * DEPRECATED
3674 */
3675void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003676xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 static int deprecated = 0;
3678 if (!deprecated) {
3679 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003680 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003681 deprecated = 1;
3682 }
3683#if 0
3684 xmlNsPtr cur;
3685
3686 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3687 if (doc->children == NULL) {
3688#ifdef DEBUG_TREE
3689 xmlGenericError(xmlGenericErrorContext,
3690 "xmlUpgradeOldNs: failed no root !\n");
3691#endif
3692 return;
3693 }
3694
3695 cur = doc->oldNs;
3696 while (cur->next != NULL) {
3697 cur->type = XML_LOCAL_NAMESPACE;
3698 cur = cur->next;
3699 }
3700 cur->type = XML_LOCAL_NAMESPACE;
3701 cur->next = doc->children->nsDef;
3702 doc->children->nsDef = doc->oldNs;
3703 doc->oldNs = NULL;
3704#endif
3705}
3706