blob: cdf8dcb1c75bbb9b749c28b4df965979e87657fa [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Daniel Veillard3c5ed912002-01-08 10:36:16 +000012#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000084 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000087 }
88 if ((myversion / 100) < (version / 100)) {
89 xmlGenericError(xmlGenericErrorContext,
90 "Warning: program compiled against libxml %d using older %d\n",
91 (version / 100), (myversion / 100));
92 }
93}
94
95
Daniel Veillard22090732001-07-16 00:06:07 +000096static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000097 "validate",
98 "load subset",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000141/**
Owen Taylor3473f882001-02-23 17:55:21 +0000142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000168/**
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->loadsubset;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000269/**
Owen Taylor3473f882001-02-23 17:55:21 +0000270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 int newvalidate = *((int *) value);
286 if ((!ctxt->validate) && (newvalidate != 0)) {
287 if (ctxt->vctxt.warning == NULL)
288 ctxt->vctxt.warning = xmlParserValidityWarning;
289 if (ctxt->vctxt.error == NULL)
290 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000291 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000292 }
293 ctxt->validate = newvalidate;
294 } else if (!strcmp(name, "keep blanks")) {
295 ctxt->keepBlanks = *((int *) value);
296 } else if (!strcmp(name, "disable SAX")) {
297 ctxt->disableSAX = *((int *) value);
298 } else if (!strcmp(name, "fetch external entities")) {
299 ctxt->loadsubset = *((int *) value);
300 } else if (!strcmp(name, "substitute entities")) {
301 ctxt->replaceEntities = *((int *) value);
302 } else if (!strcmp(name, "gather line info")) {
303 ctxt->record_info = *((int *) value);
304 } else if (!strcmp(name, "user data")) {
305 ctxt->userData = *((void **)value);
306 } else if (!strcmp(name, "is html")) {
307 ctxt->html = *((int *) value);
308 } else if (!strcmp(name, "is standalone")) {
309 ctxt->standalone = *((int *) value);
310 } else if (!strcmp(name, "document")) {
311 ctxt->myDoc = *((xmlDocPtr *) value);
312 } else if (!strcmp(name, "is well formed")) {
313 ctxt->wellFormed = *((int *) value);
314 } else if (!strcmp(name, "is valid")) {
315 ctxt->valid = *((int *) value);
316 } else if (!strcmp(name, "SAX block")) {
317 ctxt->sax = *((xmlSAXHandlerPtr *) value);
318 } else if (!strcmp(name, "SAX function internalSubset")) {
319 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function isStandalone")) {
321 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
323 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
325 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function resolveEntity")) {
327 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function getEntity")) {
329 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
330 } else if (!strcmp(name, "SAX function entityDecl")) {
331 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function notationDecl")) {
333 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function attributeDecl")) {
335 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function elementDecl")) {
337 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
339 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
341 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startDocument")) {
343 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endDocument")) {
345 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function startElement")) {
347 ctxt->sax->startElement = *((startElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function endElement")) {
349 ctxt->sax->endElement = *((endElementSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function reference")) {
351 ctxt->sax->reference = *((referenceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function characters")) {
353 ctxt->sax->characters = *((charactersSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
355 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function processingInstruction")) {
357 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function comment")) {
359 ctxt->sax->comment = *((commentSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function warning")) {
361 ctxt->sax->warning = *((warningSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function error")) {
363 ctxt->sax->error = *((errorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function fatalError")) {
365 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function getParameterEntity")) {
367 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
368 } else if (!strcmp(name, "SAX function cdataBlock")) {
369 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function externalSubset")) {
371 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
372 } else {
373 return(-1);
374 }
375 return(0);
376}
377
378/************************************************************************
379 * *
380 * Some functions to avoid too large macros *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlIsChar:
386 * @c: an unicode character (int)
387 *
388 * Check whether the character is allowed by the production
389 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
390 * | [#x10000-#x10FFFF]
391 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
392 * Also available as a macro IS_CHAR()
393 *
394 * Returns 0 if not, non-zero otherwise
395 */
396int
397xmlIsChar(int c) {
398 return(
399 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
400 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
401 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
402 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
403}
404
405/**
406 * xmlIsBlank:
407 * @c: an unicode character (int)
408 *
409 * Check whether the character is allowed by the production
410 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
411 * Also available as a macro IS_BLANK()
412 *
413 * Returns 0 if not, non-zero otherwise
414 */
415int
416xmlIsBlank(int c) {
417 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
418}
419
420/**
421 * xmlIsBaseChar:
422 * @c: an unicode character (int)
423 *
424 * Check whether the character is allowed by the production
425 * [85] BaseChar ::= ... long list see REC ...
426 *
427 * VI is your friend !
428 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
429 * and
430 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
431 *
432 * Returns 0 if not, non-zero otherwise
433 */
434static int xmlBaseArray[] = {
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
441 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
450 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
451};
452
453int
454xmlIsBaseChar(int c) {
455 return(
456 (((c) < 0x0100) ? xmlBaseArray[c] :
457 ( /* accelerator */
458 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
459 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
460 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
461 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
462 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
463 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
464 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
465 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
466 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
467 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
468 ((c) == 0x0386) ||
469 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
470 ((c) == 0x038C) ||
471 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
472 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
473 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
474 ((c) == 0x03DA) ||
475 ((c) == 0x03DC) ||
476 ((c) == 0x03DE) ||
477 ((c) == 0x03E0) ||
478 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
479 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
480 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
481 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
482 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
483 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
484 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
485 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
486 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
487 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
488 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
489 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
490 ((c) == 0x0559) ||
491 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
492 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
493 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
494 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
495 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
496 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
497 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
498 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
499 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
500 ((c) == 0x06D5) ||
501 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
502 (((c) >= 0x905) && ( /* accelerator */
503 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
504 ((c) == 0x093D) ||
505 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
506 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
507 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
508 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
509 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
510 ((c) == 0x09B2) ||
511 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
512 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
513 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
514 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
515 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
516 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
517 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
518 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
519 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
520 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
521 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
522 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
523 ((c) == 0x0A5E) ||
524 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
525 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
526 ((c) == 0x0A8D) ||
527 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
528 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
529 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
530 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
531 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
532 ((c) == 0x0ABD) ||
533 ((c) == 0x0AE0) ||
534 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
535 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
536 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
537 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
538 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
539 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
540 ((c) == 0x0B3D) ||
541 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
542 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
543 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
544 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
545 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
546 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
547 ((c) == 0x0B9C) ||
548 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
549 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
550 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
551 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
552 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
553 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
554 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
555 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
556 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
557 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
558 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
559 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
560 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
561 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
562 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
563 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
564 ((c) == 0x0CDE) ||
565 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
566 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
567 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
568 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
569 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
570 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
571 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
572 ((c) == 0x0E30) ||
573 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
574 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
575 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
576 ((c) == 0x0E84) ||
577 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
578 ((c) == 0x0E8A) ||
579 ((c) == 0x0E8D) ||
580 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
581 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
582 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
583 ((c) == 0x0EA5) ||
584 ((c) == 0x0EA7) ||
585 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
586 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
587 ((c) == 0x0EB0) ||
588 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
589 ((c) == 0x0EBD) ||
590 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
591 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
592 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
593 (((c) >= 0x10A0) && ( /* accelerator */
594 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
595 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
596 ((c) == 0x1100) ||
597 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
598 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
599 ((c) == 0x1109) ||
600 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
601 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
602 ((c) == 0x113C) ||
603 ((c) == 0x113E) ||
604 ((c) == 0x1140) ||
605 ((c) == 0x114C) ||
606 ((c) == 0x114E) ||
607 ((c) == 0x1150) ||
608 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
609 ((c) == 0x1159) ||
610 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
611 ((c) == 0x1163) ||
612 ((c) == 0x1165) ||
613 ((c) == 0x1167) ||
614 ((c) == 0x1169) ||
615 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
616 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
617 ((c) == 0x1175) ||
618 ((c) == 0x119E) ||
619 ((c) == 0x11A8) ||
620 ((c) == 0x11AB) ||
621 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
622 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
623 ((c) == 0x11BA) ||
624 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
625 ((c) == 0x11EB) ||
626 ((c) == 0x11F0) ||
627 ((c) == 0x11F9) ||
628 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
629 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
630 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
631 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
632 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
633 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
634 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
635 ((c) == 0x1F59) ||
636 ((c) == 0x1F5B) ||
637 ((c) == 0x1F5D) ||
638 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
639 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
640 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
641 ((c) == 0x1FBE) ||
642 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
643 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
644 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
645 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
646 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
647 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
648 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
649 ((c) == 0x2126) ||
650 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
651 ((c) == 0x212E) ||
652 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
653 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
654 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
655 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
656 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
657}
658
659/**
660 * xmlIsDigit:
661 * @c: an unicode character (int)
662 *
663 * Check whether the character is allowed by the production
664 * [88] Digit ::= ... long list see REC ...
665 *
666 * Returns 0 if not, non-zero otherwise
667 */
668int
669xmlIsDigit(int c) {
670 return(
671 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
672 (((c) >= 0x660) && ( /* accelerator */
673 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
674 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
675 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
676 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
677 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
678 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
679 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
680 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
681 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
682 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
683 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
684 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
685 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
686 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
687}
688
689/**
690 * xmlIsCombining:
691 * @c: an unicode character (int)
692 *
693 * Check whether the character is allowed by the production
694 * [87] CombiningChar ::= ... long list see REC ...
695 *
696 * Returns 0 if not, non-zero otherwise
697 */
698int
699xmlIsCombining(int c) {
700 return(
701 (((c) >= 0x300) && ( /* accelerator */
702 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
703 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
704 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
705 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
706 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
707 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
708 ((c) == 0x05BF) ||
709 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
710 ((c) == 0x05C4) ||
711 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
712 ((c) == 0x0670) ||
713 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
714 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
715 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
716 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
717 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
718 (((c) >= 0x0901) && ( /* accelerator */
719 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
720 ((c) == 0x093C) ||
721 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
722 ((c) == 0x094D) ||
723 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
724 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
725 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
726 ((c) == 0x09BC) ||
727 ((c) == 0x09BE) ||
728 ((c) == 0x09BF) ||
729 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
730 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
731 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
732 ((c) == 0x09D7) ||
733 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
734 (((c) >= 0x0A02) && ( /* accelerator */
735 ((c) == 0x0A02) ||
736 ((c) == 0x0A3C) ||
737 ((c) == 0x0A3E) ||
738 ((c) == 0x0A3F) ||
739 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
740 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
741 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
742 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
743 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
744 ((c) == 0x0ABC) ||
745 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
746 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
747 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
748 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
749 ((c) == 0x0B3C) ||
750 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
751 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
752 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
753 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
754 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
755 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
756 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
757 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
758 ((c) == 0x0BD7) ||
759 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
760 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
761 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
762 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
763 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
764 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
765 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
766 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
767 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
768 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
769 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
770 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
771 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
772 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
773 ((c) == 0x0D57) ||
774 (((c) >= 0x0E31) && ( /* accelerator */
775 ((c) == 0x0E31) ||
776 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
777 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
778 ((c) == 0x0EB1) ||
779 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
780 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
781 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
782 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 ((c) == 0x0F35) ||
784 ((c) == 0x0F37) ||
785 ((c) == 0x0F39) ||
786 ((c) == 0x0F3E) ||
787 ((c) == 0x0F3F) ||
788 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
789 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
790 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
791 ((c) == 0x0F97) ||
792 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
793 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
794 ((c) == 0x0FB9) ||
795 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
796 ((c) == 0x20E1) ||
797 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
798 ((c) == 0x3099) ||
799 ((c) == 0x309A))))))))));
800}
801
802/**
803 * xmlIsExtender:
804 * @c: an unicode character (int)
805 *
806 * Check whether the character is allowed by the production
807 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
808 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
809 * [#x309D-#x309E] | [#x30FC-#x30FE]
810 *
811 * Returns 0 if not, non-zero otherwise
812 */
813int
814xmlIsExtender(int c) {
815 switch (c) {
816 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
817 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
818 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
819 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000820 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000821 return 1;
822 default:
823 return 0;
824 }
825}
826
827/**
828 * xmlIsIdeographic:
829 * @c: an unicode character (int)
830 *
831 * Check whether the character is allowed by the production
832 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
833 *
834 * Returns 0 if not, non-zero otherwise
835 */
836int
837xmlIsIdeographic(int c) {
838 return(((c) < 0x0100) ? 0 :
839 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
840 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
841 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 ((c) == 0x3007));
843}
844
845/**
846 * xmlIsLetter:
847 * @c: an unicode character (int)
848 *
849 * Check whether the character is allowed by the production
850 * [84] Letter ::= BaseChar | Ideographic
851 *
852 * Returns 0 if not, non-zero otherwise
853 */
854int
855xmlIsLetter(int c) {
856 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
857}
858
859/**
860 * xmlIsPubidChar:
861 * @c: an unicode character (int)
862 *
863 * Check whether the character is allowed by the production
864 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
865 *
866 * Returns 0 if not, non-zero otherwise
867 */
868int
869xmlIsPubidChar(int c) {
870 return(
871 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
872 (((c) >= 'a') && ((c) <= 'z')) ||
873 (((c) >= 'A') && ((c) <= 'Z')) ||
874 (((c) >= '0') && ((c) <= '9')) ||
875 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
876 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
877 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
878 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
879 ((c) == '$') || ((c) == '_') || ((c) == '%'));
880}
881
882/************************************************************************
883 * *
884 * Input handling functions for progressive parsing *
885 * *
886 ************************************************************************/
887
888/* #define DEBUG_INPUT */
889/* #define DEBUG_STACK */
890/* #define DEBUG_PUSH */
891
892
893/* we need to keep enough input to show errors in context */
894#define LINE_LEN 80
895
896#ifdef DEBUG_INPUT
897#define CHECK_BUFFER(in) check_buffer(in)
898
899void check_buffer(xmlParserInputPtr in) {
900 if (in->base != in->buf->buffer->content) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: base mismatch problem\n");
903 }
904 if (in->cur < in->base) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur < base problem\n");
907 }
908 if (in->cur > in->base + in->buf->buffer->use) {
909 xmlGenericError(xmlGenericErrorContext,
910 "xmlParserInput: cur > base + use problem\n");
911 }
912 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
913 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
914 in->buf->buffer->use, in->buf->buffer->size);
915}
916
917#else
918#define CHECK_BUFFER(in)
919#endif
920
921
922/**
923 * xmlParserInputRead:
924 * @in: an XML parser input
925 * @len: an indicative size for the lookahead
926 *
927 * This function refresh the input for the parser. It doesn't try to
928 * preserve pointers to the input buffer, and discard already read data
929 *
930 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
931 * end of this entity
932 */
933int
934xmlParserInputRead(xmlParserInputPtr in, int len) {
935 int ret;
936 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000937 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000938
939#ifdef DEBUG_INPUT
940 xmlGenericError(xmlGenericErrorContext, "Read\n");
941#endif
942 if (in->buf == NULL) return(-1);
943 if (in->base == NULL) return(-1);
944 if (in->cur == NULL) return(-1);
945 if (in->buf->buffer == NULL) return(-1);
946 if (in->buf->readcallback == NULL) return(-1);
947
948 CHECK_BUFFER(in);
949
950 used = in->cur - in->buf->buffer->content;
951 ret = xmlBufferShrink(in->buf->buffer, used);
952 if (ret > 0) {
953 in->cur -= ret;
954 in->consumed += ret;
955 }
956 ret = xmlParserInputBufferRead(in->buf, len);
957 if (in->base != in->buf->buffer->content) {
958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000959 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000962 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000963 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000965 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 CHECK_BUFFER(in);
968
969 return(ret);
970}
971
972/**
973 * xmlParserInputGrow:
974 * @in: an XML parser input
975 * @len: an indicative size for the lookahead
976 *
977 * This function increase the input for the parser. It tries to
978 * preserve pointers to the input buffer, and keep already read data
979 *
980 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
981 * end of this entity
982 */
983int
984xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988#ifdef DEBUG_INPUT
989 xmlGenericError(xmlGenericErrorContext, "Grow\n");
990#endif
991 if (in->buf == NULL) return(-1);
992 if (in->base == NULL) return(-1);
993 if (in->cur == NULL) return(-1);
994 if (in->buf->buffer == NULL) return(-1);
995
996 CHECK_BUFFER(in);
997
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 indx = in->cur - in->base;
999 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001000
1001 CHECK_BUFFER(in);
1002
1003 return(0);
1004 }
1005 if (in->buf->readcallback != NULL)
1006 ret = xmlParserInputBufferGrow(in->buf, len);
1007 else
1008 return(0);
1009
1010 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001011 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001012 * block, but we use it really as an integer to do some
1013 * pointer arithmetic. Insure will raise it as a bug but in
1014 * that specific case, that's not !
1015 */
1016 if (in->base != in->buf->buffer->content) {
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001021 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001022 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001023 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001024 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001025
1026 CHECK_BUFFER(in);
1027
1028 return(ret);
1029}
1030
1031/**
1032 * xmlParserInputShrink:
1033 * @in: an XML parser input
1034 *
1035 * This function removes used input for the parser.
1036 */
1037void
1038xmlParserInputShrink(xmlParserInputPtr in) {
1039 int used;
1040 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001041 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001042
1043#ifdef DEBUG_INPUT
1044 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1045#endif
1046 if (in->buf == NULL) return;
1047 if (in->base == NULL) return;
1048 if (in->cur == NULL) return;
1049 if (in->buf->buffer == NULL) return;
1050
1051 CHECK_BUFFER(in);
1052
1053 used = in->cur - in->buf->buffer->content;
1054 /*
1055 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001056 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001057 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001058 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001059 return;
1060 if (used > INPUT_CHUNK) {
1061 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 if (ret > 0) {
1063 in->cur -= ret;
1064 in->consumed += ret;
1065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001066 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001067 }
1068
1069 CHECK_BUFFER(in);
1070
1071 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 return;
1073 }
1074 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1075 if (in->base != in->buf->buffer->content) {
1076 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001077 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001083 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001084
1085 CHECK_BUFFER(in);
1086}
1087
1088/************************************************************************
1089 * *
1090 * UTF8 character input and related functions *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlNextChar:
1096 * @ctxt: the XML parser context
1097 *
1098 * Skip to the next char input char.
1099 */
1100
1101void
1102xmlNextChar(xmlParserCtxtPtr ctxt) {
1103 if (ctxt->instate == XML_PARSER_EOF)
1104 return;
1105
1106 /*
1107 * 2.11 End-of-Line Handling
1108 * the literal two-character sequence "#xD#xA" or a standalone
1109 * literal #xD, an XML processor must pass to the application
1110 * the single character #xA.
1111 */
1112 if (ctxt->token != 0) ctxt->token = 0;
1113 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1114 if ((*ctxt->input->cur == 0) &&
1115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
1207 if (*ctxt->input->cur == 0)
1208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
1210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1211 xmlParserHandlePEReference(ctxt);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
1230 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001231 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001232 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1233
1234 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1235 ctxt->input->cur++;
1236 return;
1237}
1238
1239/**
1240 * xmlCurrentChar:
1241 * @ctxt: the XML parser context
1242 * @len: pointer to the length of the char read
1243 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001244 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001245 * bytes in the input buffer. Implement the end of line normalization:
1246 * 2.11 End-of-Line Handling
1247 * Wherever an external parsed entity or the literal entity value
1248 * of an internal parsed entity contains either the literal two-character
1249 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1250 * must pass to the application the single character #xA.
1251 * This behavior can conveniently be produced by normalizing all
1252 * line breaks to #xA on input, before parsing.)
1253 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001254 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001255 */
1256
1257int
1258xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1259 if (ctxt->instate == XML_PARSER_EOF)
1260 return(0);
1261
1262 if (ctxt->token != 0) {
1263 *len = 0;
1264 return(ctxt->token);
1265 }
1266 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1267 *len = 1;
1268 return((int) *ctxt->input->cur);
1269 }
1270 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1271 /*
1272 * We are supposed to handle UTF8, check it's valid
1273 * From rfc2044: encoding of the Unicode values on UTF-8:
1274 *
1275 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1276 * 0000 0000-0000 007F 0xxxxxxx
1277 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1278 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1279 *
1280 * Check for the 0x110000 limit too
1281 */
1282 const unsigned char *cur = ctxt->input->cur;
1283 unsigned char c;
1284 unsigned int val;
1285
1286 c = *cur;
1287 if (c & 0x80) {
1288 if (cur[1] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if ((cur[1] & 0xc0) != 0x80)
1291 goto encoding_error;
1292 if ((c & 0xe0) == 0xe0) {
1293
1294 if (cur[2] == 0)
1295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1296 if ((cur[2] & 0xc0) != 0x80)
1297 goto encoding_error;
1298 if ((c & 0xf0) == 0xf0) {
1299 if (cur[3] == 0)
1300 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1301 if (((c & 0xf8) != 0xf0) ||
1302 ((cur[3] & 0xc0) != 0x80))
1303 goto encoding_error;
1304 /* 4-byte code */
1305 *len = 4;
1306 val = (cur[0] & 0x7) << 18;
1307 val |= (cur[1] & 0x3f) << 12;
1308 val |= (cur[2] & 0x3f) << 6;
1309 val |= cur[3] & 0x3f;
1310 } else {
1311 /* 3-byte code */
1312 *len = 3;
1313 val = (cur[0] & 0xf) << 12;
1314 val |= (cur[1] & 0x3f) << 6;
1315 val |= cur[2] & 0x3f;
1316 }
1317 } else {
1318 /* 2-byte code */
1319 *len = 2;
1320 val = (cur[0] & 0x1f) << 6;
1321 val |= cur[1] & 0x3f;
1322 }
1323 if (!IS_CHAR(val)) {
1324 if ((ctxt->sax != NULL) &&
1325 (ctxt->sax->error != NULL))
1326 ctxt->sax->error(ctxt->userData,
1327 "Char 0x%X out of allowed range\n", val);
1328 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1329 ctxt->wellFormed = 0;
1330 ctxt->disableSAX = 1;
1331 }
1332 return(val);
1333 } else {
1334 /* 1-byte code */
1335 *len = 1;
1336 if (*ctxt->input->cur == 0xD) {
1337 if (ctxt->input->cur[1] == 0xA) {
1338 ctxt->nbChars++;
1339 ctxt->input->cur++;
1340 }
1341 return(0xA);
1342 }
1343 return((int) *ctxt->input->cur);
1344 }
1345 }
1346 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001347 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001348 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001349 * XML constructs only use < 128 chars
1350 */
1351 *len = 1;
1352 if (*ctxt->input->cur == 0xD) {
1353 if (ctxt->input->cur[1] == 0xA) {
1354 ctxt->nbChars++;
1355 ctxt->input->cur++;
1356 }
1357 return(0xA);
1358 }
1359 return((int) *ctxt->input->cur);
1360encoding_error:
1361 /*
1362 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001363 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001364 * declaration header. Report the error and switch the encoding
1365 * to ISO-Latin-1 (if you don't like this policy, just declare the
1366 * encoding !)
1367 */
1368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1369 ctxt->sax->error(ctxt->userData,
1370 "Input is not proper UTF-8, indicate encoding !\n");
1371 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1372 ctxt->input->cur[0], ctxt->input->cur[1],
1373 ctxt->input->cur[2], ctxt->input->cur[3]);
1374 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001375 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001376 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1377
1378 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1379 *len = 1;
1380 return((int) *ctxt->input->cur);
1381}
1382
1383/**
1384 * xmlStringCurrentChar:
1385 * @ctxt: the XML parser context
1386 * @cur: pointer to the beginning of the char
1387 * @len: pointer to the length of the char read
1388 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001389 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001390 * bytes in the input buffer.
1391 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001392 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001393 */
1394
1395int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001396xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1397{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001398 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001399 /*
1400 * We are supposed to handle UTF8, check it's valid
1401 * From rfc2044: encoding of the Unicode values on UTF-8:
1402 *
1403 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1404 * 0000 0000-0000 007F 0xxxxxxx
1405 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1406 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1407 *
1408 * Check for the 0x110000 limit too
1409 */
1410 unsigned char c;
1411 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001412
Daniel Veillardd8224e02002-01-13 15:43:22 +00001413 c = *cur;
1414 if (c & 0x80) {
1415 if ((cur[1] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001418
Daniel Veillardd8224e02002-01-13 15:43:22 +00001419 if ((cur[2] & 0xc0) != 0x80)
1420 goto encoding_error;
1421 if ((c & 0xf0) == 0xf0) {
1422 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1423 goto encoding_error;
1424 /* 4-byte code */
1425 *len = 4;
1426 val = (cur[0] & 0x7) << 18;
1427 val |= (cur[1] & 0x3f) << 12;
1428 val |= (cur[2] & 0x3f) << 6;
1429 val |= cur[3] & 0x3f;
1430 } else {
1431 /* 3-byte code */
1432 *len = 3;
1433 val = (cur[0] & 0xf) << 12;
1434 val |= (cur[1] & 0x3f) << 6;
1435 val |= cur[2] & 0x3f;
1436 }
1437 } else {
1438 /* 2-byte code */
1439 *len = 2;
1440 val = (cur[0] & 0x1f) << 6;
1441 val |= cur[1] & 0x3f;
1442 }
1443 if (!IS_CHAR(val)) {
1444 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1445 (ctxt->sax->error != NULL))
1446 ctxt->sax->error(ctxt->userData,
1447 "Char 0x%X out of allowed range\n",
1448 val);
1449 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1450 ctxt->wellFormed = 0;
1451 ctxt->disableSAX = 1;
1452 }
1453 return (val);
1454 } else {
1455 /* 1-byte code */
1456 *len = 1;
1457 return ((int) *cur);
1458 }
Owen Taylor3473f882001-02-23 17:55:21 +00001459 }
1460 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001461 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001463 * XML constructs only use < 128 chars
1464 */
1465 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001466 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001467encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001468
Owen Taylor3473f882001-02-23 17:55:21 +00001469 /*
1470 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001471 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001472 * declaration header. Report the error and switch the encoding
1473 * to ISO-Latin-1 (if you don't like this policy, just declare the
1474 * encoding !)
1475 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001476 if (ctxt != NULL) {
1477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1478 ctxt->sax->error(ctxt->userData,
1479 "Input is not proper UTF-8, indicate encoding !\n");
1480 ctxt->sax->error(ctxt->userData,
1481 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1482 ctxt->input->cur[0], ctxt->input->cur[1],
1483 ctxt->input->cur[2], ctxt->input->cur[3]);
1484 }
1485 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001486 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001487 }
Owen Taylor3473f882001-02-23 17:55:21 +00001488
1489 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001490 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001491}
1492
1493/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001494 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001495 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001496 * @val: the char value
1497 *
1498 * append the char value in the array
1499 *
1500 * Returns the number of xmlChar written
1501 */
Owen Taylor3473f882001-02-23 17:55:21 +00001502int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001503xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001504 /*
1505 * We are supposed to handle UTF8, check it's valid
1506 * From rfc2044: encoding of the Unicode values on UTF-8:
1507 *
1508 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1509 * 0000 0000-0000 007F 0xxxxxxx
1510 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1511 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1512 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001513 if (val >= 0x80) {
1514 xmlChar *savedout = out;
1515 int bits;
1516 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1517 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1518 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1519 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001521 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001522 val);
1523 return(0);
1524 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001525 for ( ; bits >= 0; bits-= 6)
1526 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1527 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 }
1529 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001530 return 1;
1531}
1532
1533/**
1534 * xmlCopyChar:
1535 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001536 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001537 * @val: the char value
1538 *
1539 * append the char value in the array
1540 *
1541 * Returns the number of xmlChar written
1542 */
1543
1544int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001545xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001546 /* the len parameter is ignored */
1547 if (val >= 0x80) {
1548 return(xmlCopyCharMultiByte (out, val));
1549 }
1550 *out = (xmlChar) val;
1551 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001552}
1553
1554/************************************************************************
1555 * *
1556 * Commodity functions to switch encodings *
1557 * *
1558 ************************************************************************/
1559
1560/**
1561 * xmlSwitchEncoding:
1562 * @ctxt: the parser context
1563 * @enc: the encoding value (number)
1564 *
1565 * change the input functions when discovering the character encoding
1566 * of a given entity.
1567 *
1568 * Returns 0 in case of success, -1 otherwise
1569 */
1570int
1571xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1572{
1573 xmlCharEncodingHandlerPtr handler;
1574
1575 switch (enc) {
1576 case XML_CHAR_ENCODING_ERROR:
1577 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1579 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1580 ctxt->wellFormed = 0;
1581 ctxt->disableSAX = 1;
1582 break;
1583 case XML_CHAR_ENCODING_NONE:
1584 /* let's assume it's UTF-8 without the XML decl */
1585 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1586 return(0);
1587 case XML_CHAR_ENCODING_UTF8:
1588 /* default encoding, no conversion should be needed */
1589 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001590
1591 /*
1592 * Errata on XML-1.0 June 20 2001
1593 * Specific handling of the Byte Order Mark for
1594 * UTF-8
1595 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001596 if ((ctxt->input != NULL) &&
1597 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001598 (ctxt->input->cur[1] == 0xBB) &&
1599 (ctxt->input->cur[2] == 0xBF)) {
1600 ctxt->input->cur += 3;
1601 }
Owen Taylor3473f882001-02-23 17:55:21 +00001602 return(0);
1603 default:
1604 break;
1605 }
1606 handler = xmlGetCharEncodingHandler(enc);
1607 if (handler == NULL) {
1608 /*
1609 * Default handlers.
1610 */
1611 switch (enc) {
1612 case XML_CHAR_ENCODING_ERROR:
1613 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1616 ctxt->wellFormed = 0;
1617 ctxt->disableSAX = 1;
1618 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1619 break;
1620 case XML_CHAR_ENCODING_NONE:
1621 /* let's assume it's UTF-8 without the XML decl */
1622 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1623 return(0);
1624 case XML_CHAR_ENCODING_UTF8:
1625 case XML_CHAR_ENCODING_ASCII:
1626 /* default encoding, no conversion should be needed */
1627 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1628 return(0);
1629 case XML_CHAR_ENCODING_UTF16LE:
1630 break;
1631 case XML_CHAR_ENCODING_UTF16BE:
1632 break;
1633 case XML_CHAR_ENCODING_UCS4LE:
1634 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636 ctxt->sax->error(ctxt->userData,
1637 "char encoding USC4 little endian not supported\n");
1638 break;
1639 case XML_CHAR_ENCODING_UCS4BE:
1640 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642 ctxt->sax->error(ctxt->userData,
1643 "char encoding USC4 big endian not supported\n");
1644 break;
1645 case XML_CHAR_ENCODING_EBCDIC:
1646 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "char encoding EBCDIC not supported\n");
1650 break;
1651 case XML_CHAR_ENCODING_UCS4_2143:
1652 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData,
1655 "char encoding UCS4 2143 not supported\n");
1656 break;
1657 case XML_CHAR_ENCODING_UCS4_3412:
1658 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "char encoding UCS4 3412 not supported\n");
1662 break;
1663 case XML_CHAR_ENCODING_UCS2:
1664 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "char encoding UCS2 not supported\n");
1668 break;
1669 case XML_CHAR_ENCODING_8859_1:
1670 case XML_CHAR_ENCODING_8859_2:
1671 case XML_CHAR_ENCODING_8859_3:
1672 case XML_CHAR_ENCODING_8859_4:
1673 case XML_CHAR_ENCODING_8859_5:
1674 case XML_CHAR_ENCODING_8859_6:
1675 case XML_CHAR_ENCODING_8859_7:
1676 case XML_CHAR_ENCODING_8859_8:
1677 case XML_CHAR_ENCODING_8859_9:
1678 /*
1679 * We used to keep the internal content in the
1680 * document encoding however this turns being unmaintainable
1681 * So xmlGetCharEncodingHandler() will return non-null
1682 * values for this now.
1683 */
1684 if ((ctxt->inputNr == 1) &&
1685 (ctxt->encoding == NULL) &&
1686 (ctxt->input->encoding != NULL)) {
1687 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1688 }
1689 ctxt->charset = enc;
1690 return(0);
1691 case XML_CHAR_ENCODING_2022_JP:
1692 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1694 ctxt->sax->error(ctxt->userData,
1695 "char encoding ISO-2022-JPnot supported\n");
1696 break;
1697 case XML_CHAR_ENCODING_SHIFT_JIS:
1698 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1700 ctxt->sax->error(ctxt->userData,
1701 "char encoding Shift_JIS not supported\n");
1702 break;
1703 case XML_CHAR_ENCODING_EUC_JP:
1704 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706 ctxt->sax->error(ctxt->userData,
1707 "char encoding EUC-JPnot supported\n");
1708 break;
1709 }
1710 }
1711 if (handler == NULL)
1712 return(-1);
1713 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1714 return(xmlSwitchToEncoding(ctxt, handler));
1715}
1716
1717/**
1718 * xmlSwitchToEncoding:
1719 * @ctxt: the parser context
1720 * @handler: the encoding handler
1721 *
1722 * change the input functions when discovering the character encoding
1723 * of a given entity.
1724 *
1725 * Returns 0 in case of success, -1 otherwise
1726 */
1727int
1728xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1729{
1730 int nbchars;
1731
1732 if (handler != NULL) {
1733 if (ctxt->input != NULL) {
1734 if (ctxt->input->buf != NULL) {
1735 if (ctxt->input->buf->encoder != NULL) {
1736 if (ctxt->input->buf->encoder == handler)
1737 return(0);
1738 /*
1739 * Note: this is a bit dangerous, but that's what it
1740 * takes to use nearly compatible signature for different
1741 * encodings.
1742 */
1743 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1744 ctxt->input->buf->encoder = handler;
1745 return(0);
1746 }
1747 ctxt->input->buf->encoder = handler;
1748
1749 /*
1750 * Is there already some content down the pipe to convert ?
1751 */
1752 if ((ctxt->input->buf->buffer != NULL) &&
1753 (ctxt->input->buf->buffer->use > 0)) {
1754 int processed;
1755
1756 /*
1757 * Specific handling of the Byte Order Mark for
1758 * UTF-16
1759 */
1760 if ((handler->name != NULL) &&
1761 (!strcmp(handler->name, "UTF-16LE")) &&
1762 (ctxt->input->cur[0] == 0xFF) &&
1763 (ctxt->input->cur[1] == 0xFE)) {
1764 ctxt->input->cur += 2;
1765 }
1766 if ((handler->name != NULL) &&
1767 (!strcmp(handler->name, "UTF-16BE")) &&
1768 (ctxt->input->cur[0] == 0xFE) &&
1769 (ctxt->input->cur[1] == 0xFF)) {
1770 ctxt->input->cur += 2;
1771 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001772 /*
1773 * Errata on XML-1.0 June 20 2001
1774 * Specific handling of the Byte Order Mark for
1775 * UTF-8
1776 */
1777 if ((handler->name != NULL) &&
1778 (!strcmp(handler->name, "UTF-8")) &&
1779 (ctxt->input->cur[0] == 0xEF) &&
1780 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001781 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001782 ctxt->input->cur += 3;
1783 }
Owen Taylor3473f882001-02-23 17:55:21 +00001784
1785 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001786 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001787 * Move it as the raw buffer and create a new input buffer
1788 */
1789 processed = ctxt->input->cur - ctxt->input->base;
1790 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1791 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1792 ctxt->input->buf->buffer = xmlBufferCreate();
1793
1794 if (ctxt->html) {
1795 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001796 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001797 */
1798 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1799 ctxt->input->buf->buffer,
1800 ctxt->input->buf->raw);
1801 } else {
1802 /*
1803 * convert just enough to get
1804 * '<?xml version="1.0" encoding="xxx"?>'
1805 * parsed with the autodetected encoding
1806 * into the parser reading buffer.
1807 */
1808 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1809 ctxt->input->buf->buffer,
1810 ctxt->input->buf->raw);
1811 }
1812 if (nbchars < 0) {
1813 xmlGenericError(xmlGenericErrorContext,
1814 "xmlSwitchToEncoding: encoder error\n");
1815 return(-1);
1816 }
1817 ctxt->input->base =
1818 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001819 ctxt->input->end =
1820 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001821
1822 }
1823 return(0);
1824 } else {
1825 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1826 /*
1827 * When parsing a static memory array one must know the
1828 * size to be able to convert the buffer.
1829 */
1830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1831 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001832 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001833 return(-1);
1834 } else {
1835 int processed;
1836
1837 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * Move it as the raw buffer and create a new input buffer
1840 */
1841 processed = ctxt->input->cur - ctxt->input->base;
1842
1843 ctxt->input->buf->raw = xmlBufferCreate();
1844 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1845 ctxt->input->length - processed);
1846 ctxt->input->buf->buffer = xmlBufferCreate();
1847
1848 /*
1849 * convert as much as possible of the raw input
1850 * to the parser reading buffer.
1851 */
1852 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1853 ctxt->input->buf->buffer,
1854 ctxt->input->buf->raw);
1855 if (nbchars < 0) {
1856 xmlGenericError(xmlGenericErrorContext,
1857 "xmlSwitchToEncoding: encoder error\n");
1858 return(-1);
1859 }
1860
1861 /*
1862 * Conversion succeeded, get rid of the old buffer
1863 */
1864 if ((ctxt->input->free != NULL) &&
1865 (ctxt->input->base != NULL))
1866 ctxt->input->free((xmlChar *) ctxt->input->base);
1867 ctxt->input->base =
1868 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001869 ctxt->input->end =
1870 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001871 }
1872 }
1873 } else {
1874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1875 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001876 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001877 return(-1);
1878 }
1879 /*
1880 * The parsing is now done in UTF8 natively
1881 */
1882 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1883 } else
1884 return(-1);
1885 return(0);
1886
1887}
1888
1889/************************************************************************
1890 * *
1891 * Commodity functions to handle entities processing *
1892 * *
1893 ************************************************************************/
1894
1895/**
1896 * xmlFreeInputStream:
1897 * @input: an xmlParserInputPtr
1898 *
1899 * Free up an input stream.
1900 */
1901void
1902xmlFreeInputStream(xmlParserInputPtr input) {
1903 if (input == NULL) return;
1904
1905 if (input->filename != NULL) xmlFree((char *) input->filename);
1906 if (input->directory != NULL) xmlFree((char *) input->directory);
1907 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1908 if (input->version != NULL) xmlFree((char *) input->version);
1909 if ((input->free != NULL) && (input->base != NULL))
1910 input->free((xmlChar *) input->base);
1911 if (input->buf != NULL)
1912 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001913 xmlFree(input);
1914}
1915
1916/**
1917 * xmlNewInputStream:
1918 * @ctxt: an XML parser context
1919 *
1920 * Create a new input stream structure
1921 * Returns the new input stream or NULL
1922 */
1923xmlParserInputPtr
1924xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1925 xmlParserInputPtr input;
1926
1927 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1928 if (input == NULL) {
1929 if (ctxt != NULL) {
1930 ctxt->errNo = XML_ERR_NO_MEMORY;
1931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1932 ctxt->sax->error(ctxt->userData,
1933 "malloc: couldn't allocate a new input stream\n");
1934 ctxt->errNo = XML_ERR_NO_MEMORY;
1935 }
1936 return(NULL);
1937 }
1938 memset(input, 0, sizeof(xmlParserInput));
1939 input->line = 1;
1940 input->col = 1;
1941 input->standalone = -1;
1942 return(input);
1943}
1944
1945/**
1946 * xmlNewIOInputStream:
1947 * @ctxt: an XML parser context
1948 * @input: an I/O Input
1949 * @enc: the charset encoding if known
1950 *
1951 * Create a new input stream structure encapsulating the @input into
1952 * a stream suitable for the parser.
1953 *
1954 * Returns the new input stream or NULL
1955 */
1956xmlParserInputPtr
1957xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1958 xmlCharEncoding enc) {
1959 xmlParserInputPtr inputStream;
1960
1961 if (xmlParserDebugEntities)
1962 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1963 inputStream = xmlNewInputStream(ctxt);
1964 if (inputStream == NULL) {
1965 return(NULL);
1966 }
1967 inputStream->filename = NULL;
1968 inputStream->buf = input;
1969 inputStream->base = inputStream->buf->buffer->content;
1970 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001971 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001972 if (enc != XML_CHAR_ENCODING_NONE) {
1973 xmlSwitchEncoding(ctxt, enc);
1974 }
1975
1976 return(inputStream);
1977}
1978
1979/**
1980 * xmlNewEntityInputStream:
1981 * @ctxt: an XML parser context
1982 * @entity: an Entity pointer
1983 *
1984 * Create a new input stream based on an xmlEntityPtr
1985 *
1986 * Returns the new input stream or NULL
1987 */
1988xmlParserInputPtr
1989xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1990 xmlParserInputPtr input;
1991
1992 if (entity == NULL) {
1993 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1995 ctxt->sax->error(ctxt->userData,
1996 "internal: xmlNewEntityInputStream entity = NULL\n");
1997 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1998 return(NULL);
1999 }
2000 if (xmlParserDebugEntities)
2001 xmlGenericError(xmlGenericErrorContext,
2002 "new input from entity: %s\n", entity->name);
2003 if (entity->content == NULL) {
2004 switch (entity->etype) {
2005 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2006 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2008 ctxt->sax->error(ctxt->userData,
2009 "xmlNewEntityInputStream unparsed entity !\n");
2010 break;
2011 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2012 case XML_EXTERNAL_PARAMETER_ENTITY:
2013 return(xmlLoadExternalEntity((char *) entity->URI,
2014 (char *) entity->ExternalID, ctxt));
2015 case XML_INTERNAL_GENERAL_ENTITY:
2016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2017 ctxt->sax->error(ctxt->userData,
2018 "Internal entity %s without content !\n", entity->name);
2019 break;
2020 case XML_INTERNAL_PARAMETER_ENTITY:
2021 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData,
2024 "Internal parameter entity %s without content !\n", entity->name);
2025 break;
2026 case XML_INTERNAL_PREDEFINED_ENTITY:
2027 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029 ctxt->sax->error(ctxt->userData,
2030 "Predefined entity %s without content !\n", entity->name);
2031 break;
2032 }
2033 return(NULL);
2034 }
2035 input = xmlNewInputStream(ctxt);
2036 if (input == NULL) {
2037 return(NULL);
2038 }
2039 input->filename = (char *) entity->URI;
2040 input->base = entity->content;
2041 input->cur = entity->content;
2042 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002043 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002044 return(input);
2045}
2046
2047/**
2048 * xmlNewStringInputStream:
2049 * @ctxt: an XML parser context
2050 * @buffer: an memory buffer
2051 *
2052 * Create a new input stream based on a memory buffer.
2053 * Returns the new input stream
2054 */
2055xmlParserInputPtr
2056xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2057 xmlParserInputPtr input;
2058
2059 if (buffer == NULL) {
2060 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2062 ctxt->sax->error(ctxt->userData,
2063 "internal: xmlNewStringInputStream string = NULL\n");
2064 return(NULL);
2065 }
2066 if (xmlParserDebugEntities)
2067 xmlGenericError(xmlGenericErrorContext,
2068 "new fixed input: %.30s\n", buffer);
2069 input = xmlNewInputStream(ctxt);
2070 if (input == NULL) {
2071 return(NULL);
2072 }
2073 input->base = buffer;
2074 input->cur = buffer;
2075 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002076 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002077 return(input);
2078}
2079
2080/**
2081 * xmlNewInputFromFile:
2082 * @ctxt: an XML parser context
2083 * @filename: the filename to use as entity
2084 *
2085 * Create a new input stream based on a file.
2086 *
2087 * Returns the new input stream or NULL in case of error
2088 */
2089xmlParserInputPtr
2090xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2091 xmlParserInputBufferPtr buf;
2092 xmlParserInputPtr inputStream;
2093 char *directory = NULL;
2094 xmlChar *URI = NULL;
2095
2096 if (xmlParserDebugEntities)
2097 xmlGenericError(xmlGenericErrorContext,
2098 "new input from file: %s\n", filename);
2099 if (ctxt == NULL) return(NULL);
2100 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2101 if (buf == NULL)
2102 return(NULL);
2103
2104 URI = xmlStrdup((xmlChar *) filename);
2105 directory = xmlParserGetDirectory((const char *) URI);
2106
2107 inputStream = xmlNewInputStream(ctxt);
2108 if (inputStream == NULL) {
2109 if (directory != NULL) xmlFree((char *) directory);
2110 if (URI != NULL) xmlFree((char *) URI);
2111 return(NULL);
2112 }
2113
2114 inputStream->filename = (const char *) URI;
2115 inputStream->directory = directory;
2116 inputStream->buf = buf;
2117
2118 inputStream->base = inputStream->buf->buffer->content;
2119 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002120 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002121 if ((ctxt->directory == NULL) && (directory != NULL))
2122 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2123 return(inputStream);
2124}
2125
2126/************************************************************************
2127 * *
2128 * Commodity functions to handle parser contexts *
2129 * *
2130 ************************************************************************/
2131
2132/**
2133 * xmlInitParserCtxt:
2134 * @ctxt: an XML parser context
2135 *
2136 * Initialize a parser context
2137 */
2138
2139void
2140xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2141{
2142 xmlSAXHandler *sax;
2143
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002144 if(ctxt==NULL) {
2145 xmlGenericError(xmlGenericErrorContext,
2146 "xmlInitParserCtxt: NULL context given\n");
2147 return;
2148 }
2149
Owen Taylor3473f882001-02-23 17:55:21 +00002150 xmlDefaultSAXHandlerInit();
2151
2152 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2153 if (sax == NULL) {
2154 xmlGenericError(xmlGenericErrorContext,
2155 "xmlInitParserCtxt: out of memory\n");
2156 }
2157 else
2158 memset(sax, 0, sizeof(xmlSAXHandler));
2159
2160 /* Allocate the Input stack */
2161 ctxt->inputTab = (xmlParserInputPtr *)
2162 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2163 if (ctxt->inputTab == NULL) {
2164 xmlGenericError(xmlGenericErrorContext,
2165 "xmlInitParserCtxt: out of memory\n");
2166 ctxt->inputNr = 0;
2167 ctxt->inputMax = 0;
2168 ctxt->input = NULL;
2169 return;
2170 }
2171 ctxt->inputNr = 0;
2172 ctxt->inputMax = 5;
2173 ctxt->input = NULL;
2174
2175 ctxt->version = NULL;
2176 ctxt->encoding = NULL;
2177 ctxt->standalone = -1;
2178 ctxt->hasExternalSubset = 0;
2179 ctxt->hasPErefs = 0;
2180 ctxt->html = 0;
2181 ctxt->external = 0;
2182 ctxt->instate = XML_PARSER_START;
2183 ctxt->token = 0;
2184 ctxt->directory = NULL;
2185
2186 /* Allocate the Node stack */
2187 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2188 if (ctxt->nodeTab == NULL) {
2189 xmlGenericError(xmlGenericErrorContext,
2190 "xmlInitParserCtxt: out of memory\n");
2191 ctxt->nodeNr = 0;
2192 ctxt->nodeMax = 0;
2193 ctxt->node = NULL;
2194 ctxt->inputNr = 0;
2195 ctxt->inputMax = 0;
2196 ctxt->input = NULL;
2197 return;
2198 }
2199 ctxt->nodeNr = 0;
2200 ctxt->nodeMax = 10;
2201 ctxt->node = NULL;
2202
2203 /* Allocate the Name stack */
2204 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2205 if (ctxt->nameTab == NULL) {
2206 xmlGenericError(xmlGenericErrorContext,
2207 "xmlInitParserCtxt: out of memory\n");
2208 ctxt->nodeNr = 0;
2209 ctxt->nodeMax = 0;
2210 ctxt->node = NULL;
2211 ctxt->inputNr = 0;
2212 ctxt->inputMax = 0;
2213 ctxt->input = NULL;
2214 ctxt->nameNr = 0;
2215 ctxt->nameMax = 0;
2216 ctxt->name = NULL;
2217 return;
2218 }
2219 ctxt->nameNr = 0;
2220 ctxt->nameMax = 10;
2221 ctxt->name = NULL;
2222
2223 /* Allocate the space stack */
2224 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2225 if (ctxt->spaceTab == NULL) {
2226 xmlGenericError(xmlGenericErrorContext,
2227 "xmlInitParserCtxt: out of memory\n");
2228 ctxt->nodeNr = 0;
2229 ctxt->nodeMax = 0;
2230 ctxt->node = NULL;
2231 ctxt->inputNr = 0;
2232 ctxt->inputMax = 0;
2233 ctxt->input = NULL;
2234 ctxt->nameNr = 0;
2235 ctxt->nameMax = 0;
2236 ctxt->name = NULL;
2237 ctxt->spaceNr = 0;
2238 ctxt->spaceMax = 0;
2239 ctxt->space = NULL;
2240 return;
2241 }
2242 ctxt->spaceNr = 1;
2243 ctxt->spaceMax = 10;
2244 ctxt->spaceTab[0] = -1;
2245 ctxt->space = &ctxt->spaceTab[0];
2246
Daniel Veillard14be0a12001-03-03 18:50:55 +00002247 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002248 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002249
Owen Taylor3473f882001-02-23 17:55:21 +00002250 ctxt->userData = ctxt;
2251 ctxt->myDoc = NULL;
2252 ctxt->wellFormed = 1;
2253 ctxt->valid = 1;
2254 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2255 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2256 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002257 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002258 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002259 if (ctxt->keepBlanks == 0)
2260 sax->ignorableWhitespace = ignorableWhitespace;
2261
Owen Taylor3473f882001-02-23 17:55:21 +00002262 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002263 ctxt->vctxt.error = xmlParserValidityError;
2264 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002266 if (xmlGetWarningsDefaultValue == 0)
2267 ctxt->vctxt.warning = NULL;
2268 else
2269 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002270 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002271 }
2272 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2273 ctxt->record_info = 0;
2274 ctxt->nbChars = 0;
2275 ctxt->checkIndex = 0;
2276 ctxt->inSubset = 0;
2277 ctxt->errNo = XML_ERR_OK;
2278 ctxt->depth = 0;
2279 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002280 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002281 xmlInitNodeInfoSeq(&ctxt->node_seq);
2282}
2283
2284/**
2285 * xmlFreeParserCtxt:
2286 * @ctxt: an XML parser context
2287 *
2288 * Free all the memory used by a parser context. However the parsed
2289 * document in ctxt->myDoc is not freed.
2290 */
2291
2292void
2293xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2294{
2295 xmlParserInputPtr input;
2296 xmlChar *oldname;
2297
2298 if (ctxt == NULL) return;
2299
2300 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2301 xmlFreeInputStream(input);
2302 }
2303 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2304 xmlFree(oldname);
2305 }
2306 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2307 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2308 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2309 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2310 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2311 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2312 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2313 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2314 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2316 xmlFree(ctxt->sax);
2317 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002318 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002319#ifdef LIBXML_CATALOG_ENABLED
2320 if (ctxt->catalogs != NULL)
2321 xmlCatalogFreeLocal(ctxt->catalogs);
2322#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002323 xmlFree(ctxt);
2324}
2325
2326/**
2327 * xmlNewParserCtxt:
2328 *
2329 * Allocate and initialize a new parser context.
2330 *
2331 * Returns the xmlParserCtxtPtr or NULL
2332 */
2333
2334xmlParserCtxtPtr
2335xmlNewParserCtxt()
2336{
2337 xmlParserCtxtPtr ctxt;
2338
2339 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2340 if (ctxt == NULL) {
2341 xmlGenericError(xmlGenericErrorContext,
2342 "xmlNewParserCtxt : cannot allocate context\n");
2343 perror("malloc");
2344 return(NULL);
2345 }
2346 memset(ctxt, 0, sizeof(xmlParserCtxt));
2347 xmlInitParserCtxt(ctxt);
2348 return(ctxt);
2349}
2350
2351/************************************************************************
2352 * *
2353 * Handling of node informations *
2354 * *
2355 ************************************************************************/
2356
2357/**
2358 * xmlClearParserCtxt:
2359 * @ctxt: an XML parser context
2360 *
2361 * Clear (release owned resources) and reinitialize a parser context
2362 */
2363
2364void
2365xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2366{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002367 if (ctxt==NULL)
2368 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 xmlClearNodeInfoSeq(&ctxt->node_seq);
2370 xmlInitParserCtxt(ctxt);
2371}
2372
2373/**
2374 * xmlParserFindNodeInfo:
2375 * @ctxt: an XML parser context
2376 * @node: an XML node within the tree
2377 *
2378 * Find the parser node info struct for a given node
2379 *
2380 * Returns an xmlParserNodeInfo block pointer or NULL
2381 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002382const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2383 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002384{
2385 unsigned long pos;
2386
2387 /* Find position where node should be at */
2388 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002389 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002390 return &ctx->node_seq.buffer[pos];
2391 else
2392 return NULL;
2393}
2394
2395
2396/**
2397 * xmlInitNodeInfoSeq:
2398 * @seq: a node info sequence pointer
2399 *
2400 * -- Initialize (set to initial state) node info sequence
2401 */
2402void
2403xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2404{
2405 seq->length = 0;
2406 seq->maximum = 0;
2407 seq->buffer = NULL;
2408}
2409
2410/**
2411 * xmlClearNodeInfoSeq:
2412 * @seq: a node info sequence pointer
2413 *
2414 * -- Clear (release memory and reinitialize) node
2415 * info sequence
2416 */
2417void
2418xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2419{
2420 if ( seq->buffer != NULL )
2421 xmlFree(seq->buffer);
2422 xmlInitNodeInfoSeq(seq);
2423}
2424
2425
2426/**
2427 * xmlParserFindNodeInfoIndex:
2428 * @seq: a node info sequence pointer
2429 * @node: an XML node pointer
2430 *
2431 *
2432 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2433 * the given node is or should be at in a sorted sequence
2434 *
2435 * Returns a long indicating the position of the record
2436 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002437unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2438 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002439{
2440 unsigned long upper, lower, middle;
2441 int found = 0;
2442
2443 /* Do a binary search for the key */
2444 lower = 1;
2445 upper = seq->length;
2446 middle = 0;
2447 while ( lower <= upper && !found) {
2448 middle = lower + (upper - lower) / 2;
2449 if ( node == seq->buffer[middle - 1].node )
2450 found = 1;
2451 else if ( node < seq->buffer[middle - 1].node )
2452 upper = middle - 1;
2453 else
2454 lower = middle + 1;
2455 }
2456
2457 /* Return position */
2458 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2459 return middle;
2460 else
2461 return middle - 1;
2462}
2463
2464
2465/**
2466 * xmlParserAddNodeInfo:
2467 * @ctxt: an XML parser context
2468 * @info: a node info sequence pointer
2469 *
2470 * Insert node info record into the sorted sequence
2471 */
2472void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002473xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002474 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002475{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002476 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002477
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002478 /* Find pos and check to see if node is already in the sequence */
2479 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2480 info->node);
2481 if (pos < ctxt->node_seq.length
2482 && ctxt->node_seq.buffer[pos].node == info->node) {
2483 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002484 }
2485
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002486 /* Otherwise, we need to add new node to buffer */
2487 else {
2488 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2489 xmlParserNodeInfo *tmp_buffer;
2490 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002491
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002492 if (ctxt->node_seq.maximum == 0)
2493 ctxt->node_seq.maximum = 2;
2494 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2495 (2 * ctxt->node_seq.maximum));
2496
2497 if (ctxt->node_seq.buffer == NULL)
2498 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2499 else
2500 tmp_buffer =
2501 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2502 byte_size);
2503
2504 if (tmp_buffer == NULL) {
2505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2506 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2507 ctxt->errNo = XML_ERR_NO_MEMORY;
2508 return;
2509 }
2510 ctxt->node_seq.buffer = tmp_buffer;
2511 ctxt->node_seq.maximum *= 2;
2512 }
2513
2514 /* If position is not at end, move elements out of the way */
2515 if (pos != ctxt->node_seq.length) {
2516 unsigned long i;
2517
2518 for (i = ctxt->node_seq.length; i > pos; i--)
2519 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2520 }
2521
2522 /* Copy element and increase length */
2523 ctxt->node_seq.buffer[pos] = *info;
2524 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002525 }
Owen Taylor3473f882001-02-23 17:55:21 +00002526}
2527
2528/************************************************************************
2529 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002530 * Defaults settings *
2531 * *
2532 ************************************************************************/
2533/**
2534 * xmlPedanticParserDefault:
2535 * @val: int 0 or 1
2536 *
2537 * Set and return the previous value for enabling pedantic warnings.
2538 *
2539 * Returns the last value for 0 for no substitution, 1 for substitution.
2540 */
2541
2542int
2543xmlPedanticParserDefault(int val) {
2544 int old = xmlPedanticParserDefaultValue;
2545
2546 xmlPedanticParserDefaultValue = val;
2547 return(old);
2548}
2549
2550/**
2551 * xmlLineNumbersDefault:
2552 * @val: int 0 or 1
2553 *
2554 * Set and return the previous value for enabling line numbers in elements
2555 * contents. This may break on old application and is turned off by default.
2556 *
2557 * Returns the last value for 0 for no substitution, 1 for substitution.
2558 */
2559
2560int
2561xmlLineNumbersDefault(int val) {
2562 int old = xmlLineNumbersDefaultValue;
2563
2564 xmlLineNumbersDefaultValue = val;
2565 return(old);
2566}
2567
2568/**
2569 * xmlSubstituteEntitiesDefault:
2570 * @val: int 0 or 1
2571 *
2572 * Set and return the previous value for default entity support.
2573 * Initially the parser always keep entity references instead of substituting
2574 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002575 * default parser behavior
2576 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002577 * file basis.
2578 *
2579 * Returns the last value for 0 for no substitution, 1 for substitution.
2580 */
2581
2582int
2583xmlSubstituteEntitiesDefault(int val) {
2584 int old = xmlSubstituteEntitiesDefaultValue;
2585
2586 xmlSubstituteEntitiesDefaultValue = val;
2587 return(old);
2588}
2589
2590/**
2591 * xmlKeepBlanksDefault:
2592 * @val: int 0 or 1
2593 *
2594 * Set and return the previous value for default blanks text nodes support.
2595 * The 1.x version of the parser used an heuristic to try to detect
2596 * ignorable white spaces. As a result the SAX callback was generating
2597 * ignorableWhitespace() callbacks instead of characters() one, and when
2598 * using the DOM output text nodes containing those blanks were not generated.
2599 * The 2.x and later version will switch to the XML standard way and
2600 * ignorableWhitespace() are only generated when running the parser in
2601 * validating mode and when the current element doesn't allow CDATA or
2602 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002603 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002604 * on 1.X libs and to switch back to the old mode for compatibility when
2605 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2606 * by using xmlIsBlankNode() commodity function to detect the "empty"
2607 * nodes generated.
2608 * This value also affect autogeneration of indentation when saving code
2609 * if blanks sections are kept, indentation is not generated.
2610 *
2611 * Returns the last value for 0 for no substitution, 1 for substitution.
2612 */
2613
2614int
2615xmlKeepBlanksDefault(int val) {
2616 int old = xmlKeepBlanksDefaultValue;
2617
2618 xmlKeepBlanksDefaultValue = val;
2619 xmlIndentTreeOutput = !val;
2620 return(old);
2621}
2622
2623/************************************************************************
2624 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002625 * Deprecated functions kept for compatibility *
2626 * *
2627 ************************************************************************/
2628
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002629/**
2630 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002631 * @lang: pointer to the string value
2632 *
2633 * Checks that the value conforms to the LanguageID production:
2634 *
2635 * NOTE: this is somewhat deprecated, those productions were removed from
2636 * the XML Second edition.
2637 *
2638 * [33] LanguageID ::= Langcode ('-' Subcode)*
2639 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2640 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2641 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2642 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2643 * [38] Subcode ::= ([a-z] | [A-Z])+
2644 *
2645 * Returns 1 if correct 0 otherwise
2646 **/
2647int
2648xmlCheckLanguageID(const xmlChar *lang) {
2649 const xmlChar *cur = lang;
2650
2651 if (cur == NULL)
2652 return(0);
2653 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2654 ((cur[0] == 'I') && (cur[1] == '-'))) {
2655 /*
2656 * IANA code
2657 */
2658 cur += 2;
2659 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2660 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2661 cur++;
2662 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2663 ((cur[0] == 'X') && (cur[1] == '-'))) {
2664 /*
2665 * User code
2666 */
2667 cur += 2;
2668 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2669 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2670 cur++;
2671 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2672 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2673 /*
2674 * ISO639
2675 */
2676 cur++;
2677 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2678 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2679 cur++;
2680 else
2681 return(0);
2682 } else
2683 return(0);
2684 while (cur[0] != 0) { /* non input consuming */
2685 if (cur[0] != '-')
2686 return(0);
2687 cur++;
2688 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2689 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2690 cur++;
2691 else
2692 return(0);
2693 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2694 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2695 cur++;
2696 }
2697 return(1);
2698}
2699
2700/**
2701 * xmlDecodeEntities:
2702 * @ctxt: the parser context
2703 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2704 * @len: the len to decode (in bytes !), -1 for no size limit
2705 * @end: an end marker xmlChar, 0 if none
2706 * @end2: an end marker xmlChar, 0 if none
2707 * @end3: an end marker xmlChar, 0 if none
2708 *
2709 * This function is deprecated, we now always process entities content
2710 * through xmlStringDecodeEntities
2711 *
2712 * TODO: remove it in next major release.
2713 *
2714 * [67] Reference ::= EntityRef | CharRef
2715 *
2716 * [69] PEReference ::= '%' Name ';'
2717 *
2718 * Returns A newly allocated string with the substitution done. The caller
2719 * must deallocate it !
2720 */
2721xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002722xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2723 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002724#if 0
2725 xmlChar *buffer = NULL;
2726 unsigned int buffer_size = 0;
2727 unsigned int nbchars = 0;
2728
2729 xmlChar *current = NULL;
2730 xmlEntityPtr ent;
2731 unsigned int max = (unsigned int) len;
2732 int c,l;
2733#endif
2734
2735 static int deprecated = 0;
2736 if (!deprecated) {
2737 xmlGenericError(xmlGenericErrorContext,
2738 "xmlDecodeEntities() deprecated function reached\n");
2739 deprecated = 1;
2740 }
2741
2742#if 0
2743 if (ctxt->depth > 40) {
2744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2745 ctxt->sax->error(ctxt->userData,
2746 "Detected entity reference loop\n");
2747 ctxt->wellFormed = 0;
2748 ctxt->disableSAX = 1;
2749 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2750 return(NULL);
2751 }
2752
2753 /*
2754 * allocate a translation buffer.
2755 */
2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2758 if (buffer == NULL) {
2759 perror("xmlDecodeEntities: malloc failed");
2760 return(NULL);
2761 }
2762
2763 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002764 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002765 */
2766 GROW;
2767 c = CUR_CHAR(l);
2768 while ((nbchars < max) && (c != end) && /* NOTUSED */
2769 (c != end2) && (c != end3)) {
2770 GROW;
2771 if (c == 0) break;
2772 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2773 int val = xmlParseCharRef(ctxt);
2774 COPY_BUF(0,buffer,nbchars,val);
2775 NEXTL(l);
2776 } else if ((c == '&') && (ctxt->token != '&') &&
2777 (what & XML_SUBSTITUTE_REF)) {
2778 if (xmlParserDebugEntities)
2779 xmlGenericError(xmlGenericErrorContext,
2780 "decoding Entity Reference\n");
2781 ent = xmlParseEntityRef(ctxt);
2782 if ((ent != NULL) &&
2783 (ctxt->replaceEntities != 0)) {
2784 current = ent->content;
2785 while (*current != 0) { /* non input consuming loop */
2786 buffer[nbchars++] = *current++;
2787 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2788 growBuffer(buffer);
2789 }
2790 }
2791 } else if (ent != NULL) {
2792 const xmlChar *cur = ent->name;
2793
2794 buffer[nbchars++] = '&';
2795 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2796 growBuffer(buffer);
2797 }
2798 while (*cur != 0) { /* non input consuming loop */
2799 buffer[nbchars++] = *cur++;
2800 }
2801 buffer[nbchars++] = ';';
2802 }
2803 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2804 /*
2805 * a PEReference induce to switch the entity flow,
2806 * we break here to flush the current set of chars
2807 * parsed if any. We will be called back later.
2808 */
2809 if (xmlParserDebugEntities)
2810 xmlGenericError(xmlGenericErrorContext,
2811 "decoding PE Reference\n");
2812 if (nbchars != 0) break;
2813
2814 xmlParsePEReference(ctxt);
2815
2816 /*
2817 * Pop-up of finished entities.
2818 */
2819 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2820 xmlPopInput(ctxt);
2821
2822 break;
2823 } else {
2824 COPY_BUF(l,buffer,nbchars,c);
2825 NEXTL(l);
2826 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2827 growBuffer(buffer);
2828 }
2829 }
2830 c = CUR_CHAR(l);
2831 }
2832 buffer[nbchars++] = 0;
2833 return(buffer);
2834#endif
2835 return(NULL);
2836}
2837
2838/**
2839 * xmlNamespaceParseNCName:
2840 * @ctxt: an XML parser context
2841 *
2842 * parse an XML namespace name.
2843 *
2844 * TODO: this seems not in use anymore, the namespace handling is done on
2845 * top of the SAX interfaces, i.e. not on raw input.
2846 *
2847 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2848 *
2849 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2850 * CombiningChar | Extender
2851 *
2852 * Returns the namespace name or NULL
2853 */
2854
2855xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002856xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002857#if 0
2858 xmlChar buf[XML_MAX_NAMELEN + 5];
2859 int len = 0, l;
2860 int cur = CUR_CHAR(l);
2861#endif
2862
2863 static int deprecated = 0;
2864 if (!deprecated) {
2865 xmlGenericError(xmlGenericErrorContext,
2866 "xmlNamespaceParseNCName() deprecated function reached\n");
2867 deprecated = 1;
2868 }
2869
2870#if 0
2871 /* load first the value of the char !!! */
2872 GROW;
2873 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2874
2875xmlGenericError(xmlGenericErrorContext,
2876 "xmlNamespaceParseNCName: reached loop 3\n");
2877 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2878 (cur == '.') || (cur == '-') ||
2879 (cur == '_') ||
2880 (IS_COMBINING(cur)) ||
2881 (IS_EXTENDER(cur))) {
2882 COPY_BUF(l,buf,len,cur);
2883 NEXTL(l);
2884 cur = CUR_CHAR(l);
2885 if (len >= XML_MAX_NAMELEN) {
2886 xmlGenericError(xmlGenericErrorContext,
2887 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2888 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2889 (cur == '.') || (cur == '-') ||
2890 (cur == '_') ||
2891 (IS_COMBINING(cur)) ||
2892 (IS_EXTENDER(cur))) {
2893 NEXTL(l);
2894 cur = CUR_CHAR(l);
2895 }
2896 break;
2897 }
2898 }
2899 return(xmlStrndup(buf, len));
2900#endif
2901 return(NULL);
2902}
2903
2904/**
2905 * xmlNamespaceParseQName:
2906 * @ctxt: an XML parser context
2907 * @prefix: a xmlChar **
2908 *
2909 * TODO: this seems not in use anymore, the namespace handling is done on
2910 * top of the SAX interfaces, i.e. not on raw input.
2911 *
2912 * parse an XML qualified name
2913 *
2914 * [NS 5] QName ::= (Prefix ':')? LocalPart
2915 *
2916 * [NS 6] Prefix ::= NCName
2917 *
2918 * [NS 7] LocalPart ::= NCName
2919 *
2920 * Returns the local part, and prefix is updated
2921 * to get the Prefix if any.
2922 */
2923
2924xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002925xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002926
2927 static int deprecated = 0;
2928 if (!deprecated) {
2929 xmlGenericError(xmlGenericErrorContext,
2930 "xmlNamespaceParseQName() deprecated function reached\n");
2931 deprecated = 1;
2932 }
2933
2934#if 0
2935 xmlChar *ret = NULL;
2936
2937 *prefix = NULL;
2938 ret = xmlNamespaceParseNCName(ctxt);
2939 if (RAW == ':') {
2940 *prefix = ret;
2941 NEXT;
2942 ret = xmlNamespaceParseNCName(ctxt);
2943 }
2944
2945 return(ret);
2946#endif
2947 return(NULL);
2948}
2949
2950/**
2951 * xmlNamespaceParseNSDef:
2952 * @ctxt: an XML parser context
2953 *
2954 * parse a namespace prefix declaration
2955 *
2956 * TODO: this seems not in use anymore, the namespace handling is done on
2957 * top of the SAX interfaces, i.e. not on raw input.
2958 *
2959 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2960 *
2961 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2962 *
2963 * Returns the namespace name
2964 */
2965
2966xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002967xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002968 static int deprecated = 0;
2969 if (!deprecated) {
2970 xmlGenericError(xmlGenericErrorContext,
2971 "xmlNamespaceParseNSDef() deprecated function reached\n");
2972 deprecated = 1;
2973 }
2974 return(NULL);
2975#if 0
2976 xmlChar *name = NULL;
2977
2978 if ((RAW == 'x') && (NXT(1) == 'm') &&
2979 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2980 (NXT(4) == 's')) {
2981 SKIP(5);
2982 if (RAW == ':') {
2983 NEXT;
2984 name = xmlNamespaceParseNCName(ctxt);
2985 }
2986 }
2987 return(name);
2988#endif
2989}
2990
2991/**
2992 * xmlParseQuotedString:
2993 * @ctxt: an XML parser context
2994 *
2995 * Parse and return a string between quotes or doublequotes
2996 *
2997 * TODO: Deprecated, to be removed at next drop of binary compatibility
2998 *
2999 * Returns the string parser or NULL.
3000 */
3001xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003002xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003003 static int deprecated = 0;
3004 if (!deprecated) {
3005 xmlGenericError(xmlGenericErrorContext,
3006 "xmlParseQuotedString() deprecated function reached\n");
3007 deprecated = 1;
3008 }
3009 return(NULL);
3010
3011#if 0
3012 xmlChar *buf = NULL;
3013 int len = 0,l;
3014 int size = XML_PARSER_BUFFER_SIZE;
3015 int c;
3016
3017 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3018 if (buf == NULL) {
3019 xmlGenericError(xmlGenericErrorContext,
3020 "malloc of %d byte failed\n", size);
3021 return(NULL);
3022 }
3023xmlGenericError(xmlGenericErrorContext,
3024 "xmlParseQuotedString: reached loop 4\n");
3025 if (RAW == '"') {
3026 NEXT;
3027 c = CUR_CHAR(l);
3028 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3029 if (len + 5 >= size) {
3030 size *= 2;
3031 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3032 if (buf == NULL) {
3033 xmlGenericError(xmlGenericErrorContext,
3034 "realloc of %d byte failed\n", size);
3035 return(NULL);
3036 }
3037 }
3038 COPY_BUF(l,buf,len,c);
3039 NEXTL(l);
3040 c = CUR_CHAR(l);
3041 }
3042 if (c != '"') {
3043 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3045 ctxt->sax->error(ctxt->userData,
3046 "String not closed \"%.50s\"\n", buf);
3047 ctxt->wellFormed = 0;
3048 ctxt->disableSAX = 1;
3049 } else {
3050 NEXT;
3051 }
3052 } else if (RAW == '\''){
3053 NEXT;
3054 c = CUR;
3055 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3056 if (len + 1 >= size) {
3057 size *= 2;
3058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3059 if (buf == NULL) {
3060 xmlGenericError(xmlGenericErrorContext,
3061 "realloc of %d byte failed\n", size);
3062 return(NULL);
3063 }
3064 }
3065 buf[len++] = c;
3066 NEXT;
3067 c = CUR;
3068 }
3069 if (RAW != '\'') {
3070 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3072 ctxt->sax->error(ctxt->userData,
3073 "String not closed \"%.50s\"\n", buf);
3074 ctxt->wellFormed = 0;
3075 ctxt->disableSAX = 1;
3076 } else {
3077 NEXT;
3078 }
3079 }
3080 return(buf);
3081#endif
3082}
3083
3084/**
3085 * xmlParseNamespace:
3086 * @ctxt: an XML parser context
3087 *
3088 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3089 *
3090 * This is what the older xml-name Working Draft specified, a bunch of
3091 * other stuff may still rely on it, so support is still here as
3092 * if it was declared on the root of the Tree:-(
3093 *
3094 * TODO: remove from library
3095 *
3096 * To be removed at next drop of binary compatibility
3097 */
3098
3099void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003100xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003101 static int deprecated = 0;
3102 if (!deprecated) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "xmlParseNamespace() deprecated function reached\n");
3105 deprecated = 1;
3106 }
3107
3108#if 0
3109 xmlChar *href = NULL;
3110 xmlChar *prefix = NULL;
3111 int garbage = 0;
3112
3113 /*
3114 * We just skipped "namespace" or "xml:namespace"
3115 */
3116 SKIP_BLANKS;
3117
3118xmlGenericError(xmlGenericErrorContext,
3119 "xmlParseNamespace: reached loop 5\n");
3120 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3121 /*
3122 * We can have "ns" or "prefix" attributes
3123 * Old encoding as 'href' or 'AS' attributes is still supported
3124 */
3125 if ((RAW == 'n') && (NXT(1) == 's')) {
3126 garbage = 0;
3127 SKIP(2);
3128 SKIP_BLANKS;
3129
3130 if (RAW != '=') continue;
3131 NEXT;
3132 SKIP_BLANKS;
3133
3134 href = xmlParseQuotedString(ctxt);
3135 SKIP_BLANKS;
3136 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3137 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3138 garbage = 0;
3139 SKIP(4);
3140 SKIP_BLANKS;
3141
3142 if (RAW != '=') continue;
3143 NEXT;
3144 SKIP_BLANKS;
3145
3146 href = xmlParseQuotedString(ctxt);
3147 SKIP_BLANKS;
3148 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3149 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3150 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3151 garbage = 0;
3152 SKIP(6);
3153 SKIP_BLANKS;
3154
3155 if (RAW != '=') continue;
3156 NEXT;
3157 SKIP_BLANKS;
3158
3159 prefix = xmlParseQuotedString(ctxt);
3160 SKIP_BLANKS;
3161 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3162 garbage = 0;
3163 SKIP(2);
3164 SKIP_BLANKS;
3165
3166 if (RAW != '=') continue;
3167 NEXT;
3168 SKIP_BLANKS;
3169
3170 prefix = xmlParseQuotedString(ctxt);
3171 SKIP_BLANKS;
3172 } else if ((RAW == '?') && (NXT(1) == '>')) {
3173 garbage = 0;
3174 NEXT;
3175 } else {
3176 /*
3177 * Found garbage when parsing the namespace
3178 */
3179 if (!garbage) {
3180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3181 ctxt->sax->error(ctxt->userData,
3182 "xmlParseNamespace found garbage\n");
3183 }
3184 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3185 ctxt->wellFormed = 0;
3186 ctxt->disableSAX = 1;
3187 NEXT;
3188 }
3189 }
3190
3191 MOVETO_ENDTAG(CUR_PTR);
3192 NEXT;
3193
3194 /*
3195 * Register the DTD.
3196 if (href != NULL)
3197 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3198 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3199 */
3200
3201 if (prefix != NULL) xmlFree(prefix);
3202 if (href != NULL) xmlFree(href);
3203#endif
3204}
3205
3206/**
3207 * xmlScanName:
3208 * @ctxt: an XML parser context
3209 *
3210 * Trickery: parse an XML name but without consuming the input flow
3211 * Needed for rollback cases. Used only when parsing entities references.
3212 *
3213 * TODO: seems deprecated now, only used in the default part of
3214 * xmlParserHandleReference
3215 *
3216 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3217 * CombiningChar | Extender
3218 *
3219 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3220 *
3221 * [6] Names ::= Name (S Name)*
3222 *
3223 * Returns the Name parsed or NULL
3224 */
3225
3226xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003227xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003228 static int deprecated = 0;
3229 if (!deprecated) {
3230 xmlGenericError(xmlGenericErrorContext,
3231 "xmlScanName() deprecated function reached\n");
3232 deprecated = 1;
3233 }
3234 return(NULL);
3235
3236#if 0
3237 xmlChar buf[XML_MAX_NAMELEN];
3238 int len = 0;
3239
3240 GROW;
3241 if (!IS_LETTER(RAW) && (RAW != '_') &&
3242 (RAW != ':')) {
3243 return(NULL);
3244 }
3245
3246
3247 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3248 (NXT(len) == '.') || (NXT(len) == '-') ||
3249 (NXT(len) == '_') || (NXT(len) == ':') ||
3250 (IS_COMBINING(NXT(len))) ||
3251 (IS_EXTENDER(NXT(len)))) {
3252 GROW;
3253 buf[len] = NXT(len);
3254 len++;
3255 if (len >= XML_MAX_NAMELEN) {
3256 xmlGenericError(xmlGenericErrorContext,
3257 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3258 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3259 (IS_DIGIT(NXT(len))) ||
3260 (NXT(len) == '.') || (NXT(len) == '-') ||
3261 (NXT(len) == '_') || (NXT(len) == ':') ||
3262 (IS_COMBINING(NXT(len))) ||
3263 (IS_EXTENDER(NXT(len))))
3264 len++;
3265 break;
3266 }
3267 }
3268 return(xmlStrndup(buf, len));
3269#endif
3270}
3271
3272/**
3273 * xmlParserHandleReference:
3274 * @ctxt: the parser context
3275 *
3276 * TODO: Remove, now deprecated ... the test is done directly in the
3277 * content parsing
3278 * routines.
3279 *
3280 * [67] Reference ::= EntityRef | CharRef
3281 *
3282 * [68] EntityRef ::= '&' Name ';'
3283 *
3284 * [ WFC: Entity Declared ]
3285 * the Name given in the entity reference must match that in an entity
3286 * declaration, except that well-formed documents need not declare any
3287 * of the following entities: amp, lt, gt, apos, quot.
3288 *
3289 * [ WFC: Parsed Entity ]
3290 * An entity reference must not contain the name of an unparsed entity
3291 *
3292 * [66] CharRef ::= '&#' [0-9]+ ';' |
3293 * '&#x' [0-9a-fA-F]+ ';'
3294 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003295 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003296 * the handling is done accordingly to
3297 * http://www.w3.org/TR/REC-xml#entproc
3298 */
3299void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003300xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003301 static int deprecated = 0;
3302 if (!deprecated) {
3303 xmlGenericError(xmlGenericErrorContext,
3304 "xmlParserHandleReference() deprecated function reached\n");
3305 deprecated = 1;
3306 }
3307
3308#if 0
3309 xmlParserInputPtr input;
3310 xmlChar *name;
3311 xmlEntityPtr ent = NULL;
3312
3313 if (ctxt->token != 0) {
3314 return;
3315 }
3316 if (RAW != '&') return;
3317 GROW;
3318 if ((RAW == '&') && (NXT(1) == '#')) {
3319 switch(ctxt->instate) {
3320 case XML_PARSER_ENTITY_DECL:
3321 case XML_PARSER_PI:
3322 case XML_PARSER_CDATA_SECTION:
3323 case XML_PARSER_COMMENT:
3324 case XML_PARSER_SYSTEM_LITERAL:
3325 /* we just ignore it there */
3326 return;
3327 case XML_PARSER_START_TAG:
3328 return;
3329 case XML_PARSER_END_TAG:
3330 return;
3331 case XML_PARSER_EOF:
3332 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3334 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3335 ctxt->wellFormed = 0;
3336 ctxt->disableSAX = 1;
3337 return;
3338 case XML_PARSER_PROLOG:
3339 case XML_PARSER_START:
3340 case XML_PARSER_MISC:
3341 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3344 ctxt->wellFormed = 0;
3345 ctxt->disableSAX = 1;
3346 return;
3347 case XML_PARSER_EPILOG:
3348 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3351 ctxt->wellFormed = 0;
3352 ctxt->disableSAX = 1;
3353 return;
3354 case XML_PARSER_DTD:
3355 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003358 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 return;
3362 case XML_PARSER_ENTITY_VALUE:
3363 /*
3364 * NOTE: in the case of entity values, we don't do the
3365 * substitution here since we need the literal
3366 * entity value to be able to save the internal
3367 * subset of the document.
3368 * This will be handled by xmlStringDecodeEntities
3369 */
3370 return;
3371 case XML_PARSER_CONTENT:
3372 return;
3373 case XML_PARSER_ATTRIBUTE_VALUE:
3374 /* ctxt->token = xmlParseCharRef(ctxt); */
3375 return;
3376 case XML_PARSER_IGNORE:
3377 return;
3378 }
3379 return;
3380 }
3381
3382 switch(ctxt->instate) {
3383 case XML_PARSER_CDATA_SECTION:
3384 return;
3385 case XML_PARSER_PI:
3386 case XML_PARSER_COMMENT:
3387 case XML_PARSER_SYSTEM_LITERAL:
3388 case XML_PARSER_CONTENT:
3389 return;
3390 case XML_PARSER_START_TAG:
3391 return;
3392 case XML_PARSER_END_TAG:
3393 return;
3394 case XML_PARSER_EOF:
3395 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3398 ctxt->wellFormed = 0;
3399 ctxt->disableSAX = 1;
3400 return;
3401 case XML_PARSER_PROLOG:
3402 case XML_PARSER_START:
3403 case XML_PARSER_MISC:
3404 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3406 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3407 ctxt->wellFormed = 0;
3408 ctxt->disableSAX = 1;
3409 return;
3410 case XML_PARSER_EPILOG:
3411 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3413 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3414 ctxt->wellFormed = 0;
3415 ctxt->disableSAX = 1;
3416 return;
3417 case XML_PARSER_ENTITY_VALUE:
3418 /*
3419 * NOTE: in the case of entity values, we don't do the
3420 * substitution here since we need the literal
3421 * entity value to be able to save the internal
3422 * subset of the document.
3423 * This will be handled by xmlStringDecodeEntities
3424 */
3425 return;
3426 case XML_PARSER_ATTRIBUTE_VALUE:
3427 /*
3428 * NOTE: in the case of attributes values, we don't do the
3429 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003430 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003431 * entities. The SAX callback is called with values
3432 * without entity substitution.
3433 * This will then be handled by xmlStringDecodeEntities
3434 */
3435 return;
3436 case XML_PARSER_ENTITY_DECL:
3437 /*
3438 * we just ignore it there
3439 * the substitution will be done once the entity is referenced
3440 */
3441 return;
3442 case XML_PARSER_DTD:
3443 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3445 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003446 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003447 ctxt->wellFormed = 0;
3448 ctxt->disableSAX = 1;
3449 return;
3450 case XML_PARSER_IGNORE:
3451 return;
3452 }
3453
3454/* TODO: this seems not reached anymore .... Verify ... */
3455xmlGenericError(xmlGenericErrorContext,
3456 "Reached deprecated section in xmlParserHandleReference()\n");
3457xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003458 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003459xmlGenericError(xmlGenericErrorContext,
3460 "indicating the version: %s, thanks !\n", xmlParserVersion);
3461 NEXT;
3462 name = xmlScanName(ctxt);
3463 if (name == NULL) {
3464 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3466 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3467 ctxt->wellFormed = 0;
3468 ctxt->disableSAX = 1;
3469 ctxt->token = '&';
3470 return;
3471 }
3472 if (NXT(xmlStrlen(name)) != ';') {
3473 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3475 ctxt->sax->error(ctxt->userData,
3476 "Entity reference: ';' expected\n");
3477 ctxt->wellFormed = 0;
3478 ctxt->disableSAX = 1;
3479 ctxt->token = '&';
3480 xmlFree(name);
3481 return;
3482 }
3483 SKIP(xmlStrlen(name) + 1);
3484 if (ctxt->sax != NULL) {
3485 if (ctxt->sax->getEntity != NULL)
3486 ent = ctxt->sax->getEntity(ctxt->userData, name);
3487 }
3488
3489 /*
3490 * [ WFC: Entity Declared ]
3491 * the Name given in the entity reference must match that in an entity
3492 * declaration, except that well-formed documents need not declare any
3493 * of the following entities: amp, lt, gt, apos, quot.
3494 */
3495 if (ent == NULL)
3496 ent = xmlGetPredefinedEntity(name);
3497 if (ent == NULL) {
3498 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "Entity reference: entity %s not declared\n",
3502 name);
3503 ctxt->wellFormed = 0;
3504 ctxt->disableSAX = 1;
3505 xmlFree(name);
3506 return;
3507 }
3508
3509 /*
3510 * [ WFC: Parsed Entity ]
3511 * An entity reference must not contain the name of an unparsed entity
3512 */
3513 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3514 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3516 ctxt->sax->error(ctxt->userData,
3517 "Entity reference to unparsed entity %s\n", name);
3518 ctxt->wellFormed = 0;
3519 ctxt->disableSAX = 1;
3520 }
3521
3522 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3523 ctxt->token = ent->content[0];
3524 xmlFree(name);
3525 return;
3526 }
3527 input = xmlNewEntityInputStream(ctxt, ent);
3528 xmlPushInput(ctxt, input);
3529 xmlFree(name);
3530#endif
3531 return;
3532}
3533
3534/**
3535 * xmlHandleEntity:
3536 * @ctxt: an XML parser context
3537 * @entity: an XML entity pointer.
3538 *
3539 * Default handling of defined entities, when should we define a new input
3540 * stream ? When do we just handle that as a set of chars ?
3541 *
3542 * OBSOLETE: to be removed at some point.
3543 */
3544
3545void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003546xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003547 static int deprecated = 0;
3548 if (!deprecated) {
3549 xmlGenericError(xmlGenericErrorContext,
3550 "xmlHandleEntity() deprecated function reached\n");
3551 deprecated = 1;
3552 }
3553
3554#if 0
3555 int len;
3556 xmlParserInputPtr input;
3557
3558 if (entity->content == NULL) {
3559 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3561 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3562 entity->name);
3563 ctxt->wellFormed = 0;
3564 ctxt->disableSAX = 1;
3565 return;
3566 }
3567 len = xmlStrlen(entity->content);
3568 if (len <= 2) goto handle_as_char;
3569
3570 /*
3571 * Redefine its content as an input stream.
3572 */
3573 input = xmlNewEntityInputStream(ctxt, entity);
3574 xmlPushInput(ctxt, input);
3575 return;
3576
3577handle_as_char:
3578 /*
3579 * Just handle the content as a set of chars.
3580 */
3581 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3582 (ctxt->sax->characters != NULL))
3583 ctxt->sax->characters(ctxt->userData, entity->content, len);
3584#endif
3585}
3586
3587/**
3588 * xmlNewGlobalNs:
3589 * @doc: the document carrying the namespace
3590 * @href: the URI associated
3591 * @prefix: the prefix for the namespace
3592 *
3593 * Creation of a Namespace, the old way using PI and without scoping
3594 * DEPRECATED !!!
3595 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003596 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003597 */
3598xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003599xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3600 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003601 static int deprecated = 0;
3602 if (!deprecated) {
3603 xmlGenericError(xmlGenericErrorContext,
3604 "xmlNewGlobalNs() deprecated function reached\n");
3605 deprecated = 1;
3606 }
3607 return(NULL);
3608#if 0
3609 xmlNodePtr root;
3610
3611 xmlNsPtr cur;
3612
3613 root = xmlDocGetRootElement(doc);
3614 if (root != NULL)
3615 return(xmlNewNs(root, href, prefix));
3616
3617 /*
3618 * if there is no root element yet, create an old Namespace type
3619 * and it will be moved to the root at save time.
3620 */
3621 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3622 if (cur == NULL) {
3623 xmlGenericError(xmlGenericErrorContext,
3624 "xmlNewGlobalNs : malloc failed\n");
3625 return(NULL);
3626 }
3627 memset(cur, 0, sizeof(xmlNs));
3628 cur->type = XML_GLOBAL_NAMESPACE;
3629
3630 if (href != NULL)
3631 cur->href = xmlStrdup(href);
3632 if (prefix != NULL)
3633 cur->prefix = xmlStrdup(prefix);
3634
3635 /*
3636 * Add it at the end to preserve parsing order ...
3637 */
3638 if (doc != NULL) {
3639 if (doc->oldNs == NULL) {
3640 doc->oldNs = cur;
3641 } else {
3642 xmlNsPtr prev = doc->oldNs;
3643
3644 while (prev->next != NULL) prev = prev->next;
3645 prev->next = cur;
3646 }
3647 }
3648
3649 return(NULL);
3650#endif
3651}
3652
3653/**
3654 * xmlUpgradeOldNs:
3655 * @doc: a document pointer
3656 *
3657 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3658 * DEPRECATED
3659 */
3660void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003661xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003662 static int deprecated = 0;
3663 if (!deprecated) {
3664 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003665 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003666 deprecated = 1;
3667 }
3668#if 0
3669 xmlNsPtr cur;
3670
3671 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3672 if (doc->children == NULL) {
3673#ifdef DEBUG_TREE
3674 xmlGenericError(xmlGenericErrorContext,
3675 "xmlUpgradeOldNs: failed no root !\n");
3676#endif
3677 return;
3678 }
3679
3680 cur = doc->oldNs;
3681 while (cur->next != NULL) {
3682 cur->type = XML_LOCAL_NAMESPACE;
3683 cur = cur->next;
3684 }
3685 cur->type = XML_LOCAL_NAMESPACE;
3686 cur->next = doc->children->nsDef;
3687 doc->children->nsDef = doc->oldNs;
3688 doc->oldNs = NULL;
3689#endif
3690}
3691