blob: 68ac53825fc567d03cfc2d5eb39a972649178fe3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000061#ifdef VMS
62int xmlSubstituteEntitiesDefaultVal = 0;
63#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
64int xmlDoValidityCheckingDefaultVal = 0;
65#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000066#endif
Owen Taylor3473f882001-02-23 17:55:21 +000067
Daniel Veillard5e2dace2001-07-18 19:30:27 +000068/**
Owen Taylor3473f882001-02-23 17:55:21 +000069 * xmlCheckVersion:
70 * @version: the include version number
71 *
72 * check the compiled lib version against the include one.
73 * This can warn or immediately kill the application
74 */
75void
76xmlCheckVersion(int version) {
77 int myversion = (int) LIBXML_VERSION;
78
Daniel Veillard6f350292001-10-14 09:56:15 +000079 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000080
Owen Taylor3473f882001-02-23 17:55:21 +000081 if ((myversion / 10000) != (version / 10000)) {
82 xmlGenericError(xmlGenericErrorContext,
83 "Fatal: program compiled against libxml %d using libxml %d\n",
84 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000085 fprintf(stderr,
86 "Fatal: program compiled against libxml %d using libxml %d\n",
87 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000088 }
89 if ((myversion / 100) < (version / 100)) {
90 xmlGenericError(xmlGenericErrorContext,
91 "Warning: program compiled against libxml %d using older %d\n",
92 (version / 100), (myversion / 100));
93 }
94}
95
96
Daniel Veillard22090732001-07-16 00:06:07 +000097static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "validate",
99 "load subset",
100 "keep blanks",
101 "disable SAX",
102 "fetch external entities",
103 "substitute entities",
104 "gather line info",
105 "user data",
106 "is html",
107 "is standalone",
108 "stop parser",
109 "document",
110 "is well formed",
111 "is valid",
112 "SAX block",
113 "SAX function internalSubset",
114 "SAX function isStandalone",
115 "SAX function hasInternalSubset",
116 "SAX function hasExternalSubset",
117 "SAX function resolveEntity",
118 "SAX function getEntity",
119 "SAX function entityDecl",
120 "SAX function notationDecl",
121 "SAX function attributeDecl",
122 "SAX function elementDecl",
123 "SAX function unparsedEntityDecl",
124 "SAX function setDocumentLocator",
125 "SAX function startDocument",
126 "SAX function endDocument",
127 "SAX function startElement",
128 "SAX function endElement",
129 "SAX function reference",
130 "SAX function characters",
131 "SAX function ignorableWhitespace",
132 "SAX function processingInstruction",
133 "SAX function comment",
134 "SAX function warning",
135 "SAX function error",
136 "SAX function fatalError",
137 "SAX function getParameterEntity",
138 "SAX function cdataBlock",
139 "SAX function externalSubset",
140};
141
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000142/**
Owen Taylor3473f882001-02-23 17:55:21 +0000143 * xmlGetFeaturesList:
144 * @len: the length of the features name array (input/output)
145 * @result: an array of string to be filled with the features name.
146 *
147 * Copy at most *@len feature names into the @result array
148 *
149 * Returns -1 in case or error, or the total number of features,
150 * len is updated with the number of strings copied,
151 * strings must not be deallocated
152 */
153int
154xmlGetFeaturesList(int *len, const char **result) {
155 int ret, i;
156
157 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
158 if ((len == NULL) || (result == NULL))
159 return(ret);
160 if ((*len < 0) || (*len >= 1000))
161 return(-1);
162 if (*len > ret)
163 *len = ret;
164 for (i = 0;i < *len;i++)
165 result[i] = xmlFeaturesList[i];
166 return(ret);
167}
168
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000169/**
Owen Taylor3473f882001-02-23 17:55:21 +0000170 * xmlGetFeature:
171 * @ctxt: an XML/HTML parser context
172 * @name: the feature name
173 * @result: location to store the result
174 *
175 * Read the current value of one feature of this parser instance
176 *
177 * Returns -1 in case or error, 0 otherwise
178 */
179int
180xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
181 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
182 return(-1);
183
184 if (!strcmp(name, "validate")) {
185 *((int *) result) = ctxt->validate;
186 } else if (!strcmp(name, "keep blanks")) {
187 *((int *) result) = ctxt->keepBlanks;
188 } else if (!strcmp(name, "disable SAX")) {
189 *((int *) result) = ctxt->disableSAX;
190 } else if (!strcmp(name, "fetch external entities")) {
191 *((int *) result) = ctxt->loadsubset;
192 } else if (!strcmp(name, "substitute entities")) {
193 *((int *) result) = ctxt->replaceEntities;
194 } else if (!strcmp(name, "gather line info")) {
195 *((int *) result) = ctxt->record_info;
196 } else if (!strcmp(name, "user data")) {
197 *((void **)result) = ctxt->userData;
198 } else if (!strcmp(name, "is html")) {
199 *((int *) result) = ctxt->html;
200 } else if (!strcmp(name, "is standalone")) {
201 *((int *) result) = ctxt->standalone;
202 } else if (!strcmp(name, "document")) {
203 *((xmlDocPtr *) result) = ctxt->myDoc;
204 } else if (!strcmp(name, "is well formed")) {
205 *((int *) result) = ctxt->wellFormed;
206 } else if (!strcmp(name, "is valid")) {
207 *((int *) result) = ctxt->valid;
208 } else if (!strcmp(name, "SAX block")) {
209 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
210 } else if (!strcmp(name, "SAX function internalSubset")) {
211 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
212 } else if (!strcmp(name, "SAX function isStandalone")) {
213 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
214 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
215 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
216 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
217 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
218 } else if (!strcmp(name, "SAX function resolveEntity")) {
219 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
220 } else if (!strcmp(name, "SAX function getEntity")) {
221 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
222 } else if (!strcmp(name, "SAX function entityDecl")) {
223 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
224 } else if (!strcmp(name, "SAX function notationDecl")) {
225 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
226 } else if (!strcmp(name, "SAX function attributeDecl")) {
227 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
228 } else if (!strcmp(name, "SAX function elementDecl")) {
229 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
230 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
231 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
232 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
233 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
234 } else if (!strcmp(name, "SAX function startDocument")) {
235 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
236 } else if (!strcmp(name, "SAX function endDocument")) {
237 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
238 } else if (!strcmp(name, "SAX function startElement")) {
239 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
240 } else if (!strcmp(name, "SAX function endElement")) {
241 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
242 } else if (!strcmp(name, "SAX function reference")) {
243 *((referenceSAXFunc *) result) = ctxt->sax->reference;
244 } else if (!strcmp(name, "SAX function characters")) {
245 *((charactersSAXFunc *) result) = ctxt->sax->characters;
246 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
247 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
248 } else if (!strcmp(name, "SAX function processingInstruction")) {
249 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
250 } else if (!strcmp(name, "SAX function comment")) {
251 *((commentSAXFunc *) result) = ctxt->sax->comment;
252 } else if (!strcmp(name, "SAX function warning")) {
253 *((warningSAXFunc *) result) = ctxt->sax->warning;
254 } else if (!strcmp(name, "SAX function error")) {
255 *((errorSAXFunc *) result) = ctxt->sax->error;
256 } else if (!strcmp(name, "SAX function fatalError")) {
257 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
258 } else if (!strcmp(name, "SAX function getParameterEntity")) {
259 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
260 } else if (!strcmp(name, "SAX function cdataBlock")) {
261 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
262 } else if (!strcmp(name, "SAX function externalSubset")) {
263 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
264 } else {
265 return(-1);
266 }
267 return(0);
268}
269
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000270/**
Owen Taylor3473f882001-02-23 17:55:21 +0000271 * xmlSetFeature:
272 * @ctxt: an XML/HTML parser context
273 * @name: the feature name
274 * @value: pointer to the location of the new value
275 *
276 * Change the current value of one feature of this parser instance
277 *
278 * Returns -1 in case or error, 0 otherwise
279 */
280int
281xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
282 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
283 return(-1);
284
285 if (!strcmp(name, "validate")) {
286 int newvalidate = *((int *) value);
287 if ((!ctxt->validate) && (newvalidate != 0)) {
288 if (ctxt->vctxt.warning == NULL)
289 ctxt->vctxt.warning = xmlParserValidityWarning;
290 if (ctxt->vctxt.error == NULL)
291 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000292 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000293 }
294 ctxt->validate = newvalidate;
295 } else if (!strcmp(name, "keep blanks")) {
296 ctxt->keepBlanks = *((int *) value);
297 } else if (!strcmp(name, "disable SAX")) {
298 ctxt->disableSAX = *((int *) value);
299 } else if (!strcmp(name, "fetch external entities")) {
300 ctxt->loadsubset = *((int *) value);
301 } else if (!strcmp(name, "substitute entities")) {
302 ctxt->replaceEntities = *((int *) value);
303 } else if (!strcmp(name, "gather line info")) {
304 ctxt->record_info = *((int *) value);
305 } else if (!strcmp(name, "user data")) {
306 ctxt->userData = *((void **)value);
307 } else if (!strcmp(name, "is html")) {
308 ctxt->html = *((int *) value);
309 } else if (!strcmp(name, "is standalone")) {
310 ctxt->standalone = *((int *) value);
311 } else if (!strcmp(name, "document")) {
312 ctxt->myDoc = *((xmlDocPtr *) value);
313 } else if (!strcmp(name, "is well formed")) {
314 ctxt->wellFormed = *((int *) value);
315 } else if (!strcmp(name, "is valid")) {
316 ctxt->valid = *((int *) value);
317 } else if (!strcmp(name, "SAX block")) {
318 ctxt->sax = *((xmlSAXHandlerPtr *) value);
319 } else if (!strcmp(name, "SAX function internalSubset")) {
320 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function isStandalone")) {
322 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
324 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
326 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function resolveEntity")) {
328 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
329 } else if (!strcmp(name, "SAX function getEntity")) {
330 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
331 } else if (!strcmp(name, "SAX function entityDecl")) {
332 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function notationDecl")) {
334 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function attributeDecl")) {
336 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function elementDecl")) {
338 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
340 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
342 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function startDocument")) {
344 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function endDocument")) {
346 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function startElement")) {
348 ctxt->sax->startElement = *((startElementSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function endElement")) {
350 ctxt->sax->endElement = *((endElementSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function reference")) {
352 ctxt->sax->reference = *((referenceSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function characters")) {
354 ctxt->sax->characters = *((charactersSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
356 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function processingInstruction")) {
358 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function comment")) {
360 ctxt->sax->comment = *((commentSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function warning")) {
362 ctxt->sax->warning = *((warningSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function error")) {
364 ctxt->sax->error = *((errorSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function fatalError")) {
366 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function getParameterEntity")) {
368 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
369 } else if (!strcmp(name, "SAX function cdataBlock")) {
370 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function externalSubset")) {
372 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
373 } else {
374 return(-1);
375 }
376 return(0);
377}
378
379/************************************************************************
380 * *
381 * Some functions to avoid too large macros *
382 * *
383 ************************************************************************/
384
385/**
386 * xmlIsChar:
387 * @c: an unicode character (int)
388 *
389 * Check whether the character is allowed by the production
390 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
391 * | [#x10000-#x10FFFF]
392 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
393 * Also available as a macro IS_CHAR()
394 *
395 * Returns 0 if not, non-zero otherwise
396 */
397int
398xmlIsChar(int c) {
399 return(
400 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
401 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
402 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
403 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
404}
405
406/**
407 * xmlIsBlank:
408 * @c: an unicode character (int)
409 *
410 * Check whether the character is allowed by the production
411 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
412 * Also available as a macro IS_BLANK()
413 *
414 * Returns 0 if not, non-zero otherwise
415 */
416int
417xmlIsBlank(int c) {
418 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
419}
420
421/**
422 * xmlIsBaseChar:
423 * @c: an unicode character (int)
424 *
425 * Check whether the character is allowed by the production
426 * [85] BaseChar ::= ... long list see REC ...
427 *
428 * VI is your friend !
429 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
430 * and
431 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
432 *
433 * Returns 0 if not, non-zero otherwise
434 */
435static int xmlBaseArray[] = {
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
440 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
442 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
449 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
450 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
451 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
452};
453
454int
455xmlIsBaseChar(int c) {
456 return(
457 (((c) < 0x0100) ? xmlBaseArray[c] :
458 ( /* accelerator */
459 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
460 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
461 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
462 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
463 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
464 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
465 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
466 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
467 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
468 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
469 ((c) == 0x0386) ||
470 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
471 ((c) == 0x038C) ||
472 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
473 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
474 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
475 ((c) == 0x03DA) ||
476 ((c) == 0x03DC) ||
477 ((c) == 0x03DE) ||
478 ((c) == 0x03E0) ||
479 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
480 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
481 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
482 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
483 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
484 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
485 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
486 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
487 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
488 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
489 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
490 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
491 ((c) == 0x0559) ||
492 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
493 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
494 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
495 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
496 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
497 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
498 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
499 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
500 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
501 ((c) == 0x06D5) ||
502 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
503 (((c) >= 0x905) && ( /* accelerator */
504 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
505 ((c) == 0x093D) ||
506 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
507 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
508 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
509 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
510 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
511 ((c) == 0x09B2) ||
512 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
513 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
514 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
515 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
516 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
517 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
518 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
519 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
520 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
521 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
522 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
523 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
524 ((c) == 0x0A5E) ||
525 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
526 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
527 ((c) == 0x0A8D) ||
528 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
529 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
530 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
531 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
532 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
533 ((c) == 0x0ABD) ||
534 ((c) == 0x0AE0) ||
535 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
536 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
537 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
538 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
539 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
540 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
541 ((c) == 0x0B3D) ||
542 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
543 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
544 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
545 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
546 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
547 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
548 ((c) == 0x0B9C) ||
549 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
550 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
551 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
552 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
553 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
554 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
555 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
556 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
557 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
558 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
559 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
560 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
561 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
562 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
563 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
564 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
565 ((c) == 0x0CDE) ||
566 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
567 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
568 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
569 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
570 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
571 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
572 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
573 ((c) == 0x0E30) ||
574 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
575 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
576 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
577 ((c) == 0x0E84) ||
578 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
579 ((c) == 0x0E8A) ||
580 ((c) == 0x0E8D) ||
581 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
582 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
583 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
584 ((c) == 0x0EA5) ||
585 ((c) == 0x0EA7) ||
586 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
587 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
588 ((c) == 0x0EB0) ||
589 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
590 ((c) == 0x0EBD) ||
591 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
592 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
593 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
594 (((c) >= 0x10A0) && ( /* accelerator */
595 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
596 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
597 ((c) == 0x1100) ||
598 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
599 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
600 ((c) == 0x1109) ||
601 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
602 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
603 ((c) == 0x113C) ||
604 ((c) == 0x113E) ||
605 ((c) == 0x1140) ||
606 ((c) == 0x114C) ||
607 ((c) == 0x114E) ||
608 ((c) == 0x1150) ||
609 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
610 ((c) == 0x1159) ||
611 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
612 ((c) == 0x1163) ||
613 ((c) == 0x1165) ||
614 ((c) == 0x1167) ||
615 ((c) == 0x1169) ||
616 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
617 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
618 ((c) == 0x1175) ||
619 ((c) == 0x119E) ||
620 ((c) == 0x11A8) ||
621 ((c) == 0x11AB) ||
622 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
623 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
624 ((c) == 0x11BA) ||
625 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
626 ((c) == 0x11EB) ||
627 ((c) == 0x11F0) ||
628 ((c) == 0x11F9) ||
629 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
630 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
631 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
632 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
633 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
634 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
635 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
636 ((c) == 0x1F59) ||
637 ((c) == 0x1F5B) ||
638 ((c) == 0x1F5D) ||
639 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
640 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
641 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
642 ((c) == 0x1FBE) ||
643 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
644 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
645 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
646 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
647 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
648 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
649 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
650 ((c) == 0x2126) ||
651 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
652 ((c) == 0x212E) ||
653 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
654 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
655 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
656 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
657 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
658}
659
660/**
661 * xmlIsDigit:
662 * @c: an unicode character (int)
663 *
664 * Check whether the character is allowed by the production
665 * [88] Digit ::= ... long list see REC ...
666 *
667 * Returns 0 if not, non-zero otherwise
668 */
669int
670xmlIsDigit(int c) {
671 return(
672 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
673 (((c) >= 0x660) && ( /* accelerator */
674 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
675 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
676 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
677 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
678 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
679 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
680 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
681 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
682 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
683 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
684 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
685 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
686 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
687 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
688}
689
690/**
691 * xmlIsCombining:
692 * @c: an unicode character (int)
693 *
694 * Check whether the character is allowed by the production
695 * [87] CombiningChar ::= ... long list see REC ...
696 *
697 * Returns 0 if not, non-zero otherwise
698 */
699int
700xmlIsCombining(int c) {
701 return(
702 (((c) >= 0x300) && ( /* accelerator */
703 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
704 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
705 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
706 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
707 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
708 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
709 ((c) == 0x05BF) ||
710 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
711 ((c) == 0x05C4) ||
712 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
713 ((c) == 0x0670) ||
714 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
715 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
716 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
717 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
718 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
719 (((c) >= 0x0901) && ( /* accelerator */
720 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
721 ((c) == 0x093C) ||
722 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
723 ((c) == 0x094D) ||
724 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
725 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
726 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
727 ((c) == 0x09BC) ||
728 ((c) == 0x09BE) ||
729 ((c) == 0x09BF) ||
730 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
731 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
732 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
733 ((c) == 0x09D7) ||
734 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
735 (((c) >= 0x0A02) && ( /* accelerator */
736 ((c) == 0x0A02) ||
737 ((c) == 0x0A3C) ||
738 ((c) == 0x0A3E) ||
739 ((c) == 0x0A3F) ||
740 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
741 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
742 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
743 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
744 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
745 ((c) == 0x0ABC) ||
746 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
747 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
748 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
749 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
750 ((c) == 0x0B3C) ||
751 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
752 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
753 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
754 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
755 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
756 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
757 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
758 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
759 ((c) == 0x0BD7) ||
760 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
761 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
762 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
763 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
764 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
765 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
766 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
767 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
768 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
769 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
770 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
771 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
772 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
773 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
774 ((c) == 0x0D57) ||
775 (((c) >= 0x0E31) && ( /* accelerator */
776 ((c) == 0x0E31) ||
777 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
778 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
779 ((c) == 0x0EB1) ||
780 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
781 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
782 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
783 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
784 ((c) == 0x0F35) ||
785 ((c) == 0x0F37) ||
786 ((c) == 0x0F39) ||
787 ((c) == 0x0F3E) ||
788 ((c) == 0x0F3F) ||
789 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
790 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
791 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
792 ((c) == 0x0F97) ||
793 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
794 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
795 ((c) == 0x0FB9) ||
796 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
797 ((c) == 0x20E1) ||
798 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
799 ((c) == 0x3099) ||
800 ((c) == 0x309A))))))))));
801}
802
803/**
804 * xmlIsExtender:
805 * @c: an unicode character (int)
806 *
807 * Check whether the character is allowed by the production
808 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
809 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
810 * [#x309D-#x309E] | [#x30FC-#x30FE]
811 *
812 * Returns 0 if not, non-zero otherwise
813 */
814int
815xmlIsExtender(int c) {
816 switch (c) {
817 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
818 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
819 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
820 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000821 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000822 return 1;
823 default:
824 return 0;
825 }
826}
827
828/**
829 * xmlIsIdeographic:
830 * @c: an unicode character (int)
831 *
832 * Check whether the character is allowed by the production
833 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
834 *
835 * Returns 0 if not, non-zero otherwise
836 */
837int
838xmlIsIdeographic(int c) {
839 return(((c) < 0x0100) ? 0 :
840 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
841 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
842 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
843 ((c) == 0x3007));
844}
845
846/**
847 * xmlIsLetter:
848 * @c: an unicode character (int)
849 *
850 * Check whether the character is allowed by the production
851 * [84] Letter ::= BaseChar | Ideographic
852 *
853 * Returns 0 if not, non-zero otherwise
854 */
855int
856xmlIsLetter(int c) {
857 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
858}
859
860/**
861 * xmlIsPubidChar:
862 * @c: an unicode character (int)
863 *
864 * Check whether the character is allowed by the production
865 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
866 *
867 * Returns 0 if not, non-zero otherwise
868 */
869int
870xmlIsPubidChar(int c) {
871 return(
872 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
873 (((c) >= 'a') && ((c) <= 'z')) ||
874 (((c) >= 'A') && ((c) <= 'Z')) ||
875 (((c) >= '0') && ((c) <= '9')) ||
876 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
877 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
878 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
879 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
880 ((c) == '$') || ((c) == '_') || ((c) == '%'));
881}
882
883/************************************************************************
884 * *
885 * Input handling functions for progressive parsing *
886 * *
887 ************************************************************************/
888
889/* #define DEBUG_INPUT */
890/* #define DEBUG_STACK */
891/* #define DEBUG_PUSH */
892
893
894/* we need to keep enough input to show errors in context */
895#define LINE_LEN 80
896
897#ifdef DEBUG_INPUT
898#define CHECK_BUFFER(in) check_buffer(in)
899
900void check_buffer(xmlParserInputPtr in) {
901 if (in->base != in->buf->buffer->content) {
902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: base mismatch problem\n");
904 }
905 if (in->cur < in->base) {
906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur < base problem\n");
908 }
909 if (in->cur > in->base + in->buf->buffer->use) {
910 xmlGenericError(xmlGenericErrorContext,
911 "xmlParserInput: cur > base + use problem\n");
912 }
913 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
914 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
915 in->buf->buffer->use, in->buf->buffer->size);
916}
917
918#else
919#define CHECK_BUFFER(in)
920#endif
921
922
923/**
924 * xmlParserInputRead:
925 * @in: an XML parser input
926 * @len: an indicative size for the lookahead
927 *
928 * This function refresh the input for the parser. It doesn't try to
929 * preserve pointers to the input buffer, and discard already read data
930 *
931 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
932 * end of this entity
933 */
934int
935xmlParserInputRead(xmlParserInputPtr in, int len) {
936 int ret;
937 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000938 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000939
940#ifdef DEBUG_INPUT
941 xmlGenericError(xmlGenericErrorContext, "Read\n");
942#endif
943 if (in->buf == NULL) return(-1);
944 if (in->base == NULL) return(-1);
945 if (in->cur == NULL) return(-1);
946 if (in->buf->buffer == NULL) return(-1);
947 if (in->buf->readcallback == NULL) return(-1);
948
949 CHECK_BUFFER(in);
950
951 used = in->cur - in->buf->buffer->content;
952 ret = xmlBufferShrink(in->buf->buffer, used);
953 if (ret > 0) {
954 in->cur -= ret;
955 in->consumed += ret;
956 }
957 ret = xmlParserInputBufferRead(in->buf, len);
958 if (in->base != in->buf->buffer->content) {
959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000960 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000961 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000962 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000963 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000964 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000965 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000966 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000967
968 CHECK_BUFFER(in);
969
970 return(ret);
971}
972
973/**
974 * xmlParserInputGrow:
975 * @in: an XML parser input
976 * @len: an indicative size for the lookahead
977 *
978 * This function increase the input for the parser. It tries to
979 * preserve pointers to the input buffer, and keep already read data
980 *
981 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
982 * end of this entity
983 */
984int
985xmlParserInputGrow(xmlParserInputPtr in, int len) {
986 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000987 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000988
989#ifdef DEBUG_INPUT
990 xmlGenericError(xmlGenericErrorContext, "Grow\n");
991#endif
992 if (in->buf == NULL) return(-1);
993 if (in->base == NULL) return(-1);
994 if (in->cur == NULL) return(-1);
995 if (in->buf->buffer == NULL) return(-1);
996
997 CHECK_BUFFER(in);
998
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000999 indx = in->cur - in->base;
1000 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001001
1002 CHECK_BUFFER(in);
1003
1004 return(0);
1005 }
1006 if (in->buf->readcallback != NULL)
1007 ret = xmlParserInputBufferGrow(in->buf, len);
1008 else
1009 return(0);
1010
1011 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001012 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001013 * block, but we use it really as an integer to do some
1014 * pointer arithmetic. Insure will raise it as a bug but in
1015 * that specific case, that's not !
1016 */
1017 if (in->base != in->buf->buffer->content) {
1018 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001019 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001020 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001021 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001022 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001023 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001024 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001025 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001026
1027 CHECK_BUFFER(in);
1028
1029 return(ret);
1030}
1031
1032/**
1033 * xmlParserInputShrink:
1034 * @in: an XML parser input
1035 *
1036 * This function removes used input for the parser.
1037 */
1038void
1039xmlParserInputShrink(xmlParserInputPtr in) {
1040 int used;
1041 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001042 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001043
1044#ifdef DEBUG_INPUT
1045 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1046#endif
1047 if (in->buf == NULL) return;
1048 if (in->base == NULL) return;
1049 if (in->cur == NULL) return;
1050 if (in->buf->buffer == NULL) return;
1051
1052 CHECK_BUFFER(in);
1053
1054 used = in->cur - in->buf->buffer->content;
1055 /*
1056 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001057 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001058 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001059 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001060 return;
1061 if (used > INPUT_CHUNK) {
1062 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1063 if (ret > 0) {
1064 in->cur -= ret;
1065 in->consumed += ret;
1066 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001067 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001068 }
1069
1070 CHECK_BUFFER(in);
1071
1072 if (in->buf->buffer->use > INPUT_CHUNK) {
1073 return;
1074 }
1075 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1076 if (in->base != in->buf->buffer->content) {
1077 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001078 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001079 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001080 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001081 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001082 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001083 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001084 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001085
1086 CHECK_BUFFER(in);
1087}
1088
1089/************************************************************************
1090 * *
1091 * UTF8 character input and related functions *
1092 * *
1093 ************************************************************************/
1094
1095/**
1096 * xmlNextChar:
1097 * @ctxt: the XML parser context
1098 *
1099 * Skip to the next char input char.
1100 */
1101
1102void
1103xmlNextChar(xmlParserCtxtPtr ctxt) {
1104 if (ctxt->instate == XML_PARSER_EOF)
1105 return;
1106
1107 /*
1108 * 2.11 End-of-Line Handling
1109 * the literal two-character sequence "#xD#xA" or a standalone
1110 * literal #xD, an XML processor must pass to the application
1111 * the single character #xA.
1112 */
1113 if (ctxt->token != 0) ctxt->token = 0;
1114 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001115 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001116 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1117 (ctxt->instate != XML_PARSER_COMMENT)) {
1118 /*
1119 * If we are at the end of the current entity and
1120 * the context allows it, we pop consumed entities
1121 * automatically.
1122 * the auto closing should be blocked in other cases
1123 */
1124 xmlPopInput(ctxt);
1125 } else {
1126 if (*(ctxt->input->cur) == '\n') {
1127 ctxt->input->line++; ctxt->input->col = 1;
1128 } else ctxt->input->col++;
1129 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1130 /*
1131 * We are supposed to handle UTF8, check it's valid
1132 * From rfc2044: encoding of the Unicode values on UTF-8:
1133 *
1134 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1135 * 0000 0000-0000 007F 0xxxxxxx
1136 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1137 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1138 *
1139 * Check for the 0x110000 limit too
1140 */
1141 const unsigned char *cur = ctxt->input->cur;
1142 unsigned char c;
1143
1144 c = *cur;
1145 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001146 if (cur[1] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001149 goto encoding_error;
1150 if ((c & 0xe0) == 0xe0) {
1151 unsigned int val;
1152
Daniel Veillard561b7f82002-03-20 21:55:57 +00001153 if (cur[2] == 0)
1154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1155 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001156 goto encoding_error;
1157 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001158 if (cur[3] == 0)
1159 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1160 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001161 ((cur[3] & 0xc0) != 0x80))
1162 goto encoding_error;
1163 /* 4-byte code */
1164 ctxt->input->cur += 4;
1165 val = (cur[0] & 0x7) << 18;
1166 val |= (cur[1] & 0x3f) << 12;
1167 val |= (cur[2] & 0x3f) << 6;
1168 val |= cur[3] & 0x3f;
1169 } else {
1170 /* 3-byte code */
1171 ctxt->input->cur += 3;
1172 val = (cur[0] & 0xf) << 12;
1173 val |= (cur[1] & 0x3f) << 6;
1174 val |= cur[2] & 0x3f;
1175 }
1176 if (((val > 0xd7ff) && (val < 0xe000)) ||
1177 ((val > 0xfffd) && (val < 0x10000)) ||
1178 (val >= 0x110000)) {
1179 if ((ctxt->sax != NULL) &&
1180 (ctxt->sax->error != NULL))
1181 ctxt->sax->error(ctxt->userData,
1182 "Char 0x%X out of allowed range\n", val);
1183 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1184 ctxt->wellFormed = 0;
1185 ctxt->disableSAX = 1;
1186 }
1187 } else
1188 /* 2-byte code */
1189 ctxt->input->cur += 2;
1190 } else
1191 /* 1-byte code */
1192 ctxt->input->cur++;
1193 } else {
1194 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001195 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001196 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001197 * XML constructs only use < 128 chars
1198 */
1199 ctxt->input->cur++;
1200 }
1201 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001202 if (*ctxt->input->cur == 0)
1203 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001204 }
1205 } else {
1206 ctxt->input->cur++;
1207 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001208 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001209 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1210 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001211 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001212 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001213 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001214 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1215 xmlPopInput(ctxt);
1216 return;
1217encoding_error:
1218 /*
1219 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001220 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001221 * declaration header. Report the error and switch the encoding
1222 * to ISO-Latin-1 (if you don't like this policy, just declare the
1223 * encoding !)
1224 */
1225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1226 ctxt->sax->error(ctxt->userData,
1227 "Input is not proper UTF-8, indicate encoding !\n");
1228 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001229 ctxt->input->cur[0], ctxt->input->cur[1],
1230 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001231 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001232 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001233 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1234
1235 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001236 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001237 return;
1238}
1239
1240/**
1241 * xmlCurrentChar:
1242 * @ctxt: the XML parser context
1243 * @len: pointer to the length of the char read
1244 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001245 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * bytes in the input buffer. Implement the end of line normalization:
1247 * 2.11 End-of-Line Handling
1248 * Wherever an external parsed entity or the literal entity value
1249 * of an internal parsed entity contains either the literal two-character
1250 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1251 * must pass to the application the single character #xA.
1252 * This behavior can conveniently be produced by normalizing all
1253 * line breaks to #xA on input, before parsing.)
1254 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001255 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001256 */
1257
1258int
1259xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1260 if (ctxt->instate == XML_PARSER_EOF)
1261 return(0);
1262
1263 if (ctxt->token != 0) {
1264 *len = 0;
1265 return(ctxt->token);
1266 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001267 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1268 *len = 1;
1269 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001270 }
1271 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1272 /*
1273 * We are supposed to handle UTF8, check it's valid
1274 * From rfc2044: encoding of the Unicode values on UTF-8:
1275 *
1276 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1277 * 0000 0000-0000 007F 0xxxxxxx
1278 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1279 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1280 *
1281 * Check for the 0x110000 limit too
1282 */
1283 const unsigned char *cur = ctxt->input->cur;
1284 unsigned char c;
1285 unsigned int val;
1286
1287 c = *cur;
1288 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001289 if (cur[1] == 0)
1290 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1291 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001292 goto encoding_error;
1293 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294
1295 if (cur[2] == 0)
1296 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1297 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001298 goto encoding_error;
1299 if ((c & 0xf0) == 0xf0) {
1300 if (cur[3] == 0)
1301 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001302 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001303 ((cur[3] & 0xc0) != 0x80))
1304 goto encoding_error;
1305 /* 4-byte code */
1306 *len = 4;
1307 val = (cur[0] & 0x7) << 18;
1308 val |= (cur[1] & 0x3f) << 12;
1309 val |= (cur[2] & 0x3f) << 6;
1310 val |= cur[3] & 0x3f;
1311 } else {
1312 /* 3-byte code */
1313 *len = 3;
1314 val = (cur[0] & 0xf) << 12;
1315 val |= (cur[1] & 0x3f) << 6;
1316 val |= cur[2] & 0x3f;
1317 }
1318 } else {
1319 /* 2-byte code */
1320 *len = 2;
1321 val = (cur[0] & 0x1f) << 6;
1322 val |= cur[1] & 0x3f;
1323 }
1324 if (!IS_CHAR(val)) {
1325 if ((ctxt->sax != NULL) &&
1326 (ctxt->sax->error != NULL))
1327 ctxt->sax->error(ctxt->userData,
1328 "Char 0x%X out of allowed range\n", val);
1329 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1330 ctxt->wellFormed = 0;
1331 ctxt->disableSAX = 1;
1332 }
1333 return(val);
1334 } else {
1335 /* 1-byte code */
1336 *len = 1;
1337 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001338 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001339 ctxt->nbChars++;
1340 ctxt->input->cur++;
1341 }
1342 return(0xA);
1343 }
1344 return((int) *ctxt->input->cur);
1345 }
1346 }
1347 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001348 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001349 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001350 * XML constructs only use < 128 chars
1351 */
1352 *len = 1;
1353 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001354 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001355 ctxt->nbChars++;
1356 ctxt->input->cur++;
1357 }
1358 return(0xA);
1359 }
1360 return((int) *ctxt->input->cur);
1361encoding_error:
1362 /*
1363 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001364 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001365 * declaration header. Report the error and switch the encoding
1366 * to ISO-Latin-1 (if you don't like this policy, just declare the
1367 * encoding !)
1368 */
1369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1370 ctxt->sax->error(ctxt->userData,
1371 "Input is not proper UTF-8, indicate encoding !\n");
1372 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001373 ctxt->input->cur[0], ctxt->input->cur[1],
1374 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001375 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001376 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001377 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1378
1379 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1380 *len = 1;
1381 return((int) *ctxt->input->cur);
1382}
1383
1384/**
1385 * xmlStringCurrentChar:
1386 * @ctxt: the XML parser context
1387 * @cur: pointer to the beginning of the char
1388 * @len: pointer to the length of the char read
1389 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001390 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001391 * bytes in the input buffer.
1392 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001393 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001394 */
1395
1396int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001397xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1398{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001399 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001400 /*
1401 * We are supposed to handle UTF8, check it's valid
1402 * From rfc2044: encoding of the Unicode values on UTF-8:
1403 *
1404 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1405 * 0000 0000-0000 007F 0xxxxxxx
1406 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1407 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1408 *
1409 * Check for the 0x110000 limit too
1410 */
1411 unsigned char c;
1412 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001413
Daniel Veillardd8224e02002-01-13 15:43:22 +00001414 c = *cur;
1415 if (c & 0x80) {
1416 if ((cur[1] & 0xc0) != 0x80)
1417 goto encoding_error;
1418 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001419
Daniel Veillardd8224e02002-01-13 15:43:22 +00001420 if ((cur[2] & 0xc0) != 0x80)
1421 goto encoding_error;
1422 if ((c & 0xf0) == 0xf0) {
1423 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1424 goto encoding_error;
1425 /* 4-byte code */
1426 *len = 4;
1427 val = (cur[0] & 0x7) << 18;
1428 val |= (cur[1] & 0x3f) << 12;
1429 val |= (cur[2] & 0x3f) << 6;
1430 val |= cur[3] & 0x3f;
1431 } else {
1432 /* 3-byte code */
1433 *len = 3;
1434 val = (cur[0] & 0xf) << 12;
1435 val |= (cur[1] & 0x3f) << 6;
1436 val |= cur[2] & 0x3f;
1437 }
1438 } else {
1439 /* 2-byte code */
1440 *len = 2;
1441 val = (cur[0] & 0x1f) << 6;
1442 val |= cur[1] & 0x3f;
1443 }
1444 if (!IS_CHAR(val)) {
1445 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1446 (ctxt->sax->error != NULL))
1447 ctxt->sax->error(ctxt->userData,
1448 "Char 0x%X out of allowed range\n",
1449 val);
1450 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1451 ctxt->wellFormed = 0;
1452 ctxt->disableSAX = 1;
1453 }
1454 return (val);
1455 } else {
1456 /* 1-byte code */
1457 *len = 1;
1458 return ((int) *cur);
1459 }
Owen Taylor3473f882001-02-23 17:55:21 +00001460 }
1461 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001462 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001463 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001464 * XML constructs only use < 128 chars
1465 */
1466 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001467 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001468encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001469
Owen Taylor3473f882001-02-23 17:55:21 +00001470 /*
1471 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001472 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001473 * declaration header. Report the error and switch the encoding
1474 * to ISO-Latin-1 (if you don't like this policy, just declare the
1475 * encoding !)
1476 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001477 if (ctxt != NULL) {
1478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1479 ctxt->sax->error(ctxt->userData,
1480 "Input is not proper UTF-8, indicate encoding !\n");
1481 ctxt->sax->error(ctxt->userData,
1482 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1483 ctxt->input->cur[0], ctxt->input->cur[1],
1484 ctxt->input->cur[2], ctxt->input->cur[3]);
1485 }
1486 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001487 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001488 }
Owen Taylor3473f882001-02-23 17:55:21 +00001489
1490 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001491 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001492}
1493
1494/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001496 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001497 * @val: the char value
1498 *
1499 * append the char value in the array
1500 *
1501 * Returns the number of xmlChar written
1502 */
Owen Taylor3473f882001-02-23 17:55:21 +00001503int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001504xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001505 /*
1506 * We are supposed to handle UTF8, check it's valid
1507 * From rfc2044: encoding of the Unicode values on UTF-8:
1508 *
1509 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1510 * 0000 0000-0000 007F 0xxxxxxx
1511 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1512 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1513 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001514 if (val >= 0x80) {
1515 xmlChar *savedout = out;
1516 int bits;
1517 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1518 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1519 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1520 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001521 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001522 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001523 val);
1524 return(0);
1525 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001526 for ( ; bits >= 0; bits-= 6)
1527 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1528 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001529 }
1530 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001531 return 1;
1532}
1533
1534/**
1535 * xmlCopyChar:
1536 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001537 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538 * @val: the char value
1539 *
1540 * append the char value in the array
1541 *
1542 * Returns the number of xmlChar written
1543 */
1544
1545int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001546xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001547 /* the len parameter is ignored */
1548 if (val >= 0x80) {
1549 return(xmlCopyCharMultiByte (out, val));
1550 }
1551 *out = (xmlChar) val;
1552 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001553}
1554
1555/************************************************************************
1556 * *
1557 * Commodity functions to switch encodings *
1558 * *
1559 ************************************************************************/
1560
1561/**
1562 * xmlSwitchEncoding:
1563 * @ctxt: the parser context
1564 * @enc: the encoding value (number)
1565 *
1566 * change the input functions when discovering the character encoding
1567 * of a given entity.
1568 *
1569 * Returns 0 in case of success, -1 otherwise
1570 */
1571int
1572xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1573{
1574 xmlCharEncodingHandlerPtr handler;
1575
1576 switch (enc) {
1577 case XML_CHAR_ENCODING_ERROR:
1578 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1580 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1581 ctxt->wellFormed = 0;
1582 ctxt->disableSAX = 1;
1583 break;
1584 case XML_CHAR_ENCODING_NONE:
1585 /* let's assume it's UTF-8 without the XML decl */
1586 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1587 return(0);
1588 case XML_CHAR_ENCODING_UTF8:
1589 /* default encoding, no conversion should be needed */
1590 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001591
1592 /*
1593 * Errata on XML-1.0 June 20 2001
1594 * Specific handling of the Byte Order Mark for
1595 * UTF-8
1596 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001597 if ((ctxt->input != NULL) &&
1598 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001599 (ctxt->input->cur[1] == 0xBB) &&
1600 (ctxt->input->cur[2] == 0xBF)) {
1601 ctxt->input->cur += 3;
1602 }
Owen Taylor3473f882001-02-23 17:55:21 +00001603 return(0);
1604 default:
1605 break;
1606 }
1607 handler = xmlGetCharEncodingHandler(enc);
1608 if (handler == NULL) {
1609 /*
1610 * Default handlers.
1611 */
1612 switch (enc) {
1613 case XML_CHAR_ENCODING_ERROR:
1614 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1617 ctxt->wellFormed = 0;
1618 ctxt->disableSAX = 1;
1619 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1620 break;
1621 case XML_CHAR_ENCODING_NONE:
1622 /* let's assume it's UTF-8 without the XML decl */
1623 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1624 return(0);
1625 case XML_CHAR_ENCODING_UTF8:
1626 case XML_CHAR_ENCODING_ASCII:
1627 /* default encoding, no conversion should be needed */
1628 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1629 return(0);
1630 case XML_CHAR_ENCODING_UTF16LE:
1631 break;
1632 case XML_CHAR_ENCODING_UTF16BE:
1633 break;
1634 case XML_CHAR_ENCODING_UCS4LE:
1635 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1637 ctxt->sax->error(ctxt->userData,
1638 "char encoding USC4 little endian not supported\n");
1639 break;
1640 case XML_CHAR_ENCODING_UCS4BE:
1641 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "char encoding USC4 big endian not supported\n");
1645 break;
1646 case XML_CHAR_ENCODING_EBCDIC:
1647 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649 ctxt->sax->error(ctxt->userData,
1650 "char encoding EBCDIC not supported\n");
1651 break;
1652 case XML_CHAR_ENCODING_UCS4_2143:
1653 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "char encoding UCS4 2143 not supported\n");
1657 break;
1658 case XML_CHAR_ENCODING_UCS4_3412:
1659 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "char encoding UCS4 3412 not supported\n");
1663 break;
1664 case XML_CHAR_ENCODING_UCS2:
1665 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1667 ctxt->sax->error(ctxt->userData,
1668 "char encoding UCS2 not supported\n");
1669 break;
1670 case XML_CHAR_ENCODING_8859_1:
1671 case XML_CHAR_ENCODING_8859_2:
1672 case XML_CHAR_ENCODING_8859_3:
1673 case XML_CHAR_ENCODING_8859_4:
1674 case XML_CHAR_ENCODING_8859_5:
1675 case XML_CHAR_ENCODING_8859_6:
1676 case XML_CHAR_ENCODING_8859_7:
1677 case XML_CHAR_ENCODING_8859_8:
1678 case XML_CHAR_ENCODING_8859_9:
1679 /*
1680 * We used to keep the internal content in the
1681 * document encoding however this turns being unmaintainable
1682 * So xmlGetCharEncodingHandler() will return non-null
1683 * values for this now.
1684 */
1685 if ((ctxt->inputNr == 1) &&
1686 (ctxt->encoding == NULL) &&
1687 (ctxt->input->encoding != NULL)) {
1688 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1689 }
1690 ctxt->charset = enc;
1691 return(0);
1692 case XML_CHAR_ENCODING_2022_JP:
1693 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1695 ctxt->sax->error(ctxt->userData,
1696 "char encoding ISO-2022-JPnot supported\n");
1697 break;
1698 case XML_CHAR_ENCODING_SHIFT_JIS:
1699 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1701 ctxt->sax->error(ctxt->userData,
1702 "char encoding Shift_JIS not supported\n");
1703 break;
1704 case XML_CHAR_ENCODING_EUC_JP:
1705 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1707 ctxt->sax->error(ctxt->userData,
1708 "char encoding EUC-JPnot supported\n");
1709 break;
1710 }
1711 }
1712 if (handler == NULL)
1713 return(-1);
1714 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1715 return(xmlSwitchToEncoding(ctxt, handler));
1716}
1717
1718/**
1719 * xmlSwitchToEncoding:
1720 * @ctxt: the parser context
1721 * @handler: the encoding handler
1722 *
1723 * change the input functions when discovering the character encoding
1724 * of a given entity.
1725 *
1726 * Returns 0 in case of success, -1 otherwise
1727 */
1728int
1729xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1730{
1731 int nbchars;
1732
1733 if (handler != NULL) {
1734 if (ctxt->input != NULL) {
1735 if (ctxt->input->buf != NULL) {
1736 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001737 /*
1738 * Check in case the auto encoding detetection triggered
1739 * in already.
1740 */
Owen Taylor3473f882001-02-23 17:55:21 +00001741 if (ctxt->input->buf->encoder == handler)
1742 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001743
1744 /*
1745 * "UTF-16" can be used for both LE and BE
1746 */
1747 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1748 BAD_CAST "UTF-16", 6)) &&
1749 (!xmlStrncmp(BAD_CAST handler->name,
1750 BAD_CAST "UTF-16", 6))) {
1751 return(0);
1752 }
1753
Owen Taylor3473f882001-02-23 17:55:21 +00001754 /*
1755 * Note: this is a bit dangerous, but that's what it
1756 * takes to use nearly compatible signature for different
1757 * encodings.
1758 */
1759 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1760 ctxt->input->buf->encoder = handler;
1761 return(0);
1762 }
1763 ctxt->input->buf->encoder = handler;
1764
1765 /*
1766 * Is there already some content down the pipe to convert ?
1767 */
1768 if ((ctxt->input->buf->buffer != NULL) &&
1769 (ctxt->input->buf->buffer->use > 0)) {
1770 int processed;
1771
1772 /*
1773 * Specific handling of the Byte Order Mark for
1774 * UTF-16
1775 */
1776 if ((handler->name != NULL) &&
1777 (!strcmp(handler->name, "UTF-16LE")) &&
1778 (ctxt->input->cur[0] == 0xFF) &&
1779 (ctxt->input->cur[1] == 0xFE)) {
1780 ctxt->input->cur += 2;
1781 }
1782 if ((handler->name != NULL) &&
1783 (!strcmp(handler->name, "UTF-16BE")) &&
1784 (ctxt->input->cur[0] == 0xFE) &&
1785 (ctxt->input->cur[1] == 0xFF)) {
1786 ctxt->input->cur += 2;
1787 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001788 /*
1789 * Errata on XML-1.0 June 20 2001
1790 * Specific handling of the Byte Order Mark for
1791 * UTF-8
1792 */
1793 if ((handler->name != NULL) &&
1794 (!strcmp(handler->name, "UTF-8")) &&
1795 (ctxt->input->cur[0] == 0xEF) &&
1796 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001797 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 ctxt->input->cur += 3;
1799 }
Owen Taylor3473f882001-02-23 17:55:21 +00001800
1801 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001803 * Move it as the raw buffer and create a new input buffer
1804 */
1805 processed = ctxt->input->cur - ctxt->input->base;
1806 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1807 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1808 ctxt->input->buf->buffer = xmlBufferCreate();
1809
1810 if (ctxt->html) {
1811 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001812 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001813 */
1814 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1815 ctxt->input->buf->buffer,
1816 ctxt->input->buf->raw);
1817 } else {
1818 /*
1819 * convert just enough to get
1820 * '<?xml version="1.0" encoding="xxx"?>'
1821 * parsed with the autodetected encoding
1822 * into the parser reading buffer.
1823 */
1824 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1825 ctxt->input->buf->buffer,
1826 ctxt->input->buf->raw);
1827 }
1828 if (nbchars < 0) {
1829 xmlGenericError(xmlGenericErrorContext,
1830 "xmlSwitchToEncoding: encoder error\n");
1831 return(-1);
1832 }
1833 ctxt->input->base =
1834 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001835 ctxt->input->end =
1836 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001837
1838 }
1839 return(0);
1840 } else {
1841 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1842 /*
1843 * When parsing a static memory array one must know the
1844 * size to be able to convert the buffer.
1845 */
1846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1847 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001848 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001849 return(-1);
1850 } else {
1851 int processed;
1852
1853 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001854 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * Move it as the raw buffer and create a new input buffer
1856 */
1857 processed = ctxt->input->cur - ctxt->input->base;
1858
1859 ctxt->input->buf->raw = xmlBufferCreate();
1860 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1861 ctxt->input->length - processed);
1862 ctxt->input->buf->buffer = xmlBufferCreate();
1863
1864 /*
1865 * convert as much as possible of the raw input
1866 * to the parser reading buffer.
1867 */
1868 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1869 ctxt->input->buf->buffer,
1870 ctxt->input->buf->raw);
1871 if (nbchars < 0) {
1872 xmlGenericError(xmlGenericErrorContext,
1873 "xmlSwitchToEncoding: encoder error\n");
1874 return(-1);
1875 }
1876
1877 /*
1878 * Conversion succeeded, get rid of the old buffer
1879 */
1880 if ((ctxt->input->free != NULL) &&
1881 (ctxt->input->base != NULL))
1882 ctxt->input->free((xmlChar *) ctxt->input->base);
1883 ctxt->input->base =
1884 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885 ctxt->input->end =
1886 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888 }
1889 } else {
1890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1891 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001892 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001893 return(-1);
1894 }
1895 /*
1896 * The parsing is now done in UTF8 natively
1897 */
1898 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1899 } else
1900 return(-1);
1901 return(0);
1902
1903}
1904
1905/************************************************************************
1906 * *
1907 * Commodity functions to handle entities processing *
1908 * *
1909 ************************************************************************/
1910
1911/**
1912 * xmlFreeInputStream:
1913 * @input: an xmlParserInputPtr
1914 *
1915 * Free up an input stream.
1916 */
1917void
1918xmlFreeInputStream(xmlParserInputPtr input) {
1919 if (input == NULL) return;
1920
1921 if (input->filename != NULL) xmlFree((char *) input->filename);
1922 if (input->directory != NULL) xmlFree((char *) input->directory);
1923 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1924 if (input->version != NULL) xmlFree((char *) input->version);
1925 if ((input->free != NULL) && (input->base != NULL))
1926 input->free((xmlChar *) input->base);
1927 if (input->buf != NULL)
1928 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001929 xmlFree(input);
1930}
1931
1932/**
1933 * xmlNewInputStream:
1934 * @ctxt: an XML parser context
1935 *
1936 * Create a new input stream structure
1937 * Returns the new input stream or NULL
1938 */
1939xmlParserInputPtr
1940xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1941 xmlParserInputPtr input;
1942
1943 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1944 if (input == NULL) {
1945 if (ctxt != NULL) {
1946 ctxt->errNo = XML_ERR_NO_MEMORY;
1947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1948 ctxt->sax->error(ctxt->userData,
1949 "malloc: couldn't allocate a new input stream\n");
1950 ctxt->errNo = XML_ERR_NO_MEMORY;
1951 }
1952 return(NULL);
1953 }
1954 memset(input, 0, sizeof(xmlParserInput));
1955 input->line = 1;
1956 input->col = 1;
1957 input->standalone = -1;
1958 return(input);
1959}
1960
1961/**
1962 * xmlNewIOInputStream:
1963 * @ctxt: an XML parser context
1964 * @input: an I/O Input
1965 * @enc: the charset encoding if known
1966 *
1967 * Create a new input stream structure encapsulating the @input into
1968 * a stream suitable for the parser.
1969 *
1970 * Returns the new input stream or NULL
1971 */
1972xmlParserInputPtr
1973xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1974 xmlCharEncoding enc) {
1975 xmlParserInputPtr inputStream;
1976
1977 if (xmlParserDebugEntities)
1978 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1979 inputStream = xmlNewInputStream(ctxt);
1980 if (inputStream == NULL) {
1981 return(NULL);
1982 }
1983 inputStream->filename = NULL;
1984 inputStream->buf = input;
1985 inputStream->base = inputStream->buf->buffer->content;
1986 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001987 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001988 if (enc != XML_CHAR_ENCODING_NONE) {
1989 xmlSwitchEncoding(ctxt, enc);
1990 }
1991
1992 return(inputStream);
1993}
1994
1995/**
1996 * xmlNewEntityInputStream:
1997 * @ctxt: an XML parser context
1998 * @entity: an Entity pointer
1999 *
2000 * Create a new input stream based on an xmlEntityPtr
2001 *
2002 * Returns the new input stream or NULL
2003 */
2004xmlParserInputPtr
2005xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2006 xmlParserInputPtr input;
2007
2008 if (entity == NULL) {
2009 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2011 ctxt->sax->error(ctxt->userData,
2012 "internal: xmlNewEntityInputStream entity = NULL\n");
2013 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2014 return(NULL);
2015 }
2016 if (xmlParserDebugEntities)
2017 xmlGenericError(xmlGenericErrorContext,
2018 "new input from entity: %s\n", entity->name);
2019 if (entity->content == NULL) {
2020 switch (entity->etype) {
2021 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2022 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData,
2025 "xmlNewEntityInputStream unparsed entity !\n");
2026 break;
2027 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2028 case XML_EXTERNAL_PARAMETER_ENTITY:
2029 return(xmlLoadExternalEntity((char *) entity->URI,
2030 (char *) entity->ExternalID, ctxt));
2031 case XML_INTERNAL_GENERAL_ENTITY:
2032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2033 ctxt->sax->error(ctxt->userData,
2034 "Internal entity %s without content !\n", entity->name);
2035 break;
2036 case XML_INTERNAL_PARAMETER_ENTITY:
2037 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2039 ctxt->sax->error(ctxt->userData,
2040 "Internal parameter entity %s without content !\n", entity->name);
2041 break;
2042 case XML_INTERNAL_PREDEFINED_ENTITY:
2043 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2045 ctxt->sax->error(ctxt->userData,
2046 "Predefined entity %s without content !\n", entity->name);
2047 break;
2048 }
2049 return(NULL);
2050 }
2051 input = xmlNewInputStream(ctxt);
2052 if (input == NULL) {
2053 return(NULL);
2054 }
2055 input->filename = (char *) entity->URI;
2056 input->base = entity->content;
2057 input->cur = entity->content;
2058 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002059 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002060 return(input);
2061}
2062
2063/**
2064 * xmlNewStringInputStream:
2065 * @ctxt: an XML parser context
2066 * @buffer: an memory buffer
2067 *
2068 * Create a new input stream based on a memory buffer.
2069 * Returns the new input stream
2070 */
2071xmlParserInputPtr
2072xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2073 xmlParserInputPtr input;
2074
2075 if (buffer == NULL) {
2076 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2078 ctxt->sax->error(ctxt->userData,
2079 "internal: xmlNewStringInputStream string = NULL\n");
2080 return(NULL);
2081 }
2082 if (xmlParserDebugEntities)
2083 xmlGenericError(xmlGenericErrorContext,
2084 "new fixed input: %.30s\n", buffer);
2085 input = xmlNewInputStream(ctxt);
2086 if (input == NULL) {
2087 return(NULL);
2088 }
2089 input->base = buffer;
2090 input->cur = buffer;
2091 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002092 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002093 return(input);
2094}
2095
2096/**
2097 * xmlNewInputFromFile:
2098 * @ctxt: an XML parser context
2099 * @filename: the filename to use as entity
2100 *
2101 * Create a new input stream based on a file.
2102 *
2103 * Returns the new input stream or NULL in case of error
2104 */
2105xmlParserInputPtr
2106xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2107 xmlParserInputBufferPtr buf;
2108 xmlParserInputPtr inputStream;
2109 char *directory = NULL;
2110 xmlChar *URI = NULL;
2111
2112 if (xmlParserDebugEntities)
2113 xmlGenericError(xmlGenericErrorContext,
2114 "new input from file: %s\n", filename);
2115 if (ctxt == NULL) return(NULL);
2116 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2117 if (buf == NULL)
2118 return(NULL);
2119
2120 URI = xmlStrdup((xmlChar *) filename);
2121 directory = xmlParserGetDirectory((const char *) URI);
2122
2123 inputStream = xmlNewInputStream(ctxt);
2124 if (inputStream == NULL) {
2125 if (directory != NULL) xmlFree((char *) directory);
2126 if (URI != NULL) xmlFree((char *) URI);
2127 return(NULL);
2128 }
2129
2130 inputStream->filename = (const char *) URI;
2131 inputStream->directory = directory;
2132 inputStream->buf = buf;
2133
2134 inputStream->base = inputStream->buf->buffer->content;
2135 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002136 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if ((ctxt->directory == NULL) && (directory != NULL))
2138 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2139 return(inputStream);
2140}
2141
2142/************************************************************************
2143 * *
2144 * Commodity functions to handle parser contexts *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlInitParserCtxt:
2150 * @ctxt: an XML parser context
2151 *
2152 * Initialize a parser context
2153 */
2154
2155void
2156xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2157{
2158 xmlSAXHandler *sax;
2159
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002160 if(ctxt==NULL) {
2161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: NULL context given\n");
2163 return;
2164 }
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 xmlDefaultSAXHandlerInit();
2167
2168 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2169 if (sax == NULL) {
2170 xmlGenericError(xmlGenericErrorContext,
2171 "xmlInitParserCtxt: out of memory\n");
2172 }
2173 else
2174 memset(sax, 0, sizeof(xmlSAXHandler));
2175
2176 /* Allocate the Input stack */
2177 ctxt->inputTab = (xmlParserInputPtr *)
2178 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2179 if (ctxt->inputTab == NULL) {
2180 xmlGenericError(xmlGenericErrorContext,
2181 "xmlInitParserCtxt: out of memory\n");
2182 ctxt->inputNr = 0;
2183 ctxt->inputMax = 0;
2184 ctxt->input = NULL;
2185 return;
2186 }
2187 ctxt->inputNr = 0;
2188 ctxt->inputMax = 5;
2189 ctxt->input = NULL;
2190
2191 ctxt->version = NULL;
2192 ctxt->encoding = NULL;
2193 ctxt->standalone = -1;
2194 ctxt->hasExternalSubset = 0;
2195 ctxt->hasPErefs = 0;
2196 ctxt->html = 0;
2197 ctxt->external = 0;
2198 ctxt->instate = XML_PARSER_START;
2199 ctxt->token = 0;
2200 ctxt->directory = NULL;
2201
2202 /* Allocate the Node stack */
2203 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2204 if (ctxt->nodeTab == NULL) {
2205 xmlGenericError(xmlGenericErrorContext,
2206 "xmlInitParserCtxt: out of memory\n");
2207 ctxt->nodeNr = 0;
2208 ctxt->nodeMax = 0;
2209 ctxt->node = NULL;
2210 ctxt->inputNr = 0;
2211 ctxt->inputMax = 0;
2212 ctxt->input = NULL;
2213 return;
2214 }
2215 ctxt->nodeNr = 0;
2216 ctxt->nodeMax = 10;
2217 ctxt->node = NULL;
2218
2219 /* Allocate the Name stack */
2220 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2221 if (ctxt->nameTab == NULL) {
2222 xmlGenericError(xmlGenericErrorContext,
2223 "xmlInitParserCtxt: out of memory\n");
2224 ctxt->nodeNr = 0;
2225 ctxt->nodeMax = 0;
2226 ctxt->node = NULL;
2227 ctxt->inputNr = 0;
2228 ctxt->inputMax = 0;
2229 ctxt->input = NULL;
2230 ctxt->nameNr = 0;
2231 ctxt->nameMax = 0;
2232 ctxt->name = NULL;
2233 return;
2234 }
2235 ctxt->nameNr = 0;
2236 ctxt->nameMax = 10;
2237 ctxt->name = NULL;
2238
2239 /* Allocate the space stack */
2240 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2241 if (ctxt->spaceTab == NULL) {
2242 xmlGenericError(xmlGenericErrorContext,
2243 "xmlInitParserCtxt: out of memory\n");
2244 ctxt->nodeNr = 0;
2245 ctxt->nodeMax = 0;
2246 ctxt->node = NULL;
2247 ctxt->inputNr = 0;
2248 ctxt->inputMax = 0;
2249 ctxt->input = NULL;
2250 ctxt->nameNr = 0;
2251 ctxt->nameMax = 0;
2252 ctxt->name = NULL;
2253 ctxt->spaceNr = 0;
2254 ctxt->spaceMax = 0;
2255 ctxt->space = NULL;
2256 return;
2257 }
2258 ctxt->spaceNr = 1;
2259 ctxt->spaceMax = 10;
2260 ctxt->spaceTab[0] = -1;
2261 ctxt->space = &ctxt->spaceTab[0];
2262
Daniel Veillard14be0a12001-03-03 18:50:55 +00002263 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002264 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002265
Owen Taylor3473f882001-02-23 17:55:21 +00002266 ctxt->userData = ctxt;
2267 ctxt->myDoc = NULL;
2268 ctxt->wellFormed = 1;
2269 ctxt->valid = 1;
2270 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2271 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2272 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002273 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002274 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002275 if (ctxt->keepBlanks == 0)
2276 sax->ignorableWhitespace = ignorableWhitespace;
2277
Owen Taylor3473f882001-02-23 17:55:21 +00002278 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002279 ctxt->vctxt.error = xmlParserValidityError;
2280 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 if (xmlGetWarningsDefaultValue == 0)
2283 ctxt->vctxt.warning = NULL;
2284 else
2285 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002287 }
2288 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2289 ctxt->record_info = 0;
2290 ctxt->nbChars = 0;
2291 ctxt->checkIndex = 0;
2292 ctxt->inSubset = 0;
2293 ctxt->errNo = XML_ERR_OK;
2294 ctxt->depth = 0;
2295 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002296 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 xmlInitNodeInfoSeq(&ctxt->node_seq);
2298}
2299
2300/**
2301 * xmlFreeParserCtxt:
2302 * @ctxt: an XML parser context
2303 *
2304 * Free all the memory used by a parser context. However the parsed
2305 * document in ctxt->myDoc is not freed.
2306 */
2307
2308void
2309xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2310{
2311 xmlParserInputPtr input;
2312 xmlChar *oldname;
2313
2314 if (ctxt == NULL) return;
2315
2316 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2317 xmlFreeInputStream(input);
2318 }
2319 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2320 xmlFree(oldname);
2321 }
2322 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2323 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2324 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2325 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2326 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2327 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2328 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2329 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2330 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002331 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2332 xmlFree(ctxt->sax);
2333 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002334 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002335#ifdef LIBXML_CATALOG_ENABLED
2336 if (ctxt->catalogs != NULL)
2337 xmlCatalogFreeLocal(ctxt->catalogs);
2338#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002339 xmlFree(ctxt);
2340}
2341
2342/**
2343 * xmlNewParserCtxt:
2344 *
2345 * Allocate and initialize a new parser context.
2346 *
2347 * Returns the xmlParserCtxtPtr or NULL
2348 */
2349
2350xmlParserCtxtPtr
2351xmlNewParserCtxt()
2352{
2353 xmlParserCtxtPtr ctxt;
2354
2355 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2356 if (ctxt == NULL) {
2357 xmlGenericError(xmlGenericErrorContext,
2358 "xmlNewParserCtxt : cannot allocate context\n");
2359 perror("malloc");
2360 return(NULL);
2361 }
2362 memset(ctxt, 0, sizeof(xmlParserCtxt));
2363 xmlInitParserCtxt(ctxt);
2364 return(ctxt);
2365}
2366
2367/************************************************************************
2368 * *
2369 * Handling of node informations *
2370 * *
2371 ************************************************************************/
2372
2373/**
2374 * xmlClearParserCtxt:
2375 * @ctxt: an XML parser context
2376 *
2377 * Clear (release owned resources) and reinitialize a parser context
2378 */
2379
2380void
2381xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2382{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002383 if (ctxt==NULL)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 xmlClearNodeInfoSeq(&ctxt->node_seq);
2386 xmlInitParserCtxt(ctxt);
2387}
2388
2389/**
2390 * xmlParserFindNodeInfo:
2391 * @ctxt: an XML parser context
2392 * @node: an XML node within the tree
2393 *
2394 * Find the parser node info struct for a given node
2395 *
2396 * Returns an xmlParserNodeInfo block pointer or NULL
2397 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002398const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2399 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002400{
2401 unsigned long pos;
2402
2403 /* Find position where node should be at */
2404 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002405 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002406 return &ctx->node_seq.buffer[pos];
2407 else
2408 return NULL;
2409}
2410
2411
2412/**
2413 * xmlInitNodeInfoSeq:
2414 * @seq: a node info sequence pointer
2415 *
2416 * -- Initialize (set to initial state) node info sequence
2417 */
2418void
2419xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2420{
2421 seq->length = 0;
2422 seq->maximum = 0;
2423 seq->buffer = NULL;
2424}
2425
2426/**
2427 * xmlClearNodeInfoSeq:
2428 * @seq: a node info sequence pointer
2429 *
2430 * -- Clear (release memory and reinitialize) node
2431 * info sequence
2432 */
2433void
2434xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2435{
2436 if ( seq->buffer != NULL )
2437 xmlFree(seq->buffer);
2438 xmlInitNodeInfoSeq(seq);
2439}
2440
2441
2442/**
2443 * xmlParserFindNodeInfoIndex:
2444 * @seq: a node info sequence pointer
2445 * @node: an XML node pointer
2446 *
2447 *
2448 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2449 * the given node is or should be at in a sorted sequence
2450 *
2451 * Returns a long indicating the position of the record
2452 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002453unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2454 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002455{
2456 unsigned long upper, lower, middle;
2457 int found = 0;
2458
2459 /* Do a binary search for the key */
2460 lower = 1;
2461 upper = seq->length;
2462 middle = 0;
2463 while ( lower <= upper && !found) {
2464 middle = lower + (upper - lower) / 2;
2465 if ( node == seq->buffer[middle - 1].node )
2466 found = 1;
2467 else if ( node < seq->buffer[middle - 1].node )
2468 upper = middle - 1;
2469 else
2470 lower = middle + 1;
2471 }
2472
2473 /* Return position */
2474 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2475 return middle;
2476 else
2477 return middle - 1;
2478}
2479
2480
2481/**
2482 * xmlParserAddNodeInfo:
2483 * @ctxt: an XML parser context
2484 * @info: a node info sequence pointer
2485 *
2486 * Insert node info record into the sorted sequence
2487 */
2488void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002489xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002490 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002491{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002492 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002493
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002494 /* Find pos and check to see if node is already in the sequence */
2495 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2496 info->node);
2497 if (pos < ctxt->node_seq.length
2498 && ctxt->node_seq.buffer[pos].node == info->node) {
2499 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002500 }
2501
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002502 /* Otherwise, we need to add new node to buffer */
2503 else {
2504 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2505 xmlParserNodeInfo *tmp_buffer;
2506 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002507
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002508 if (ctxt->node_seq.maximum == 0)
2509 ctxt->node_seq.maximum = 2;
2510 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2511 (2 * ctxt->node_seq.maximum));
2512
2513 if (ctxt->node_seq.buffer == NULL)
2514 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2515 else
2516 tmp_buffer =
2517 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2518 byte_size);
2519
2520 if (tmp_buffer == NULL) {
2521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2522 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2523 ctxt->errNo = XML_ERR_NO_MEMORY;
2524 return;
2525 }
2526 ctxt->node_seq.buffer = tmp_buffer;
2527 ctxt->node_seq.maximum *= 2;
2528 }
2529
2530 /* If position is not at end, move elements out of the way */
2531 if (pos != ctxt->node_seq.length) {
2532 unsigned long i;
2533
2534 for (i = ctxt->node_seq.length; i > pos; i--)
2535 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2536 }
2537
2538 /* Copy element and increase length */
2539 ctxt->node_seq.buffer[pos] = *info;
2540 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002541 }
Owen Taylor3473f882001-02-23 17:55:21 +00002542}
2543
2544/************************************************************************
2545 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002546 * Defaults settings *
2547 * *
2548 ************************************************************************/
2549/**
2550 * xmlPedanticParserDefault:
2551 * @val: int 0 or 1
2552 *
2553 * Set and return the previous value for enabling pedantic warnings.
2554 *
2555 * Returns the last value for 0 for no substitution, 1 for substitution.
2556 */
2557
2558int
2559xmlPedanticParserDefault(int val) {
2560 int old = xmlPedanticParserDefaultValue;
2561
2562 xmlPedanticParserDefaultValue = val;
2563 return(old);
2564}
2565
2566/**
2567 * xmlLineNumbersDefault:
2568 * @val: int 0 or 1
2569 *
2570 * Set and return the previous value for enabling line numbers in elements
2571 * contents. This may break on old application and is turned off by default.
2572 *
2573 * Returns the last value for 0 for no substitution, 1 for substitution.
2574 */
2575
2576int
2577xmlLineNumbersDefault(int val) {
2578 int old = xmlLineNumbersDefaultValue;
2579
2580 xmlLineNumbersDefaultValue = val;
2581 return(old);
2582}
2583
2584/**
2585 * xmlSubstituteEntitiesDefault:
2586 * @val: int 0 or 1
2587 *
2588 * Set and return the previous value for default entity support.
2589 * Initially the parser always keep entity references instead of substituting
2590 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002591 * default parser behavior
2592 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002593 * file basis.
2594 *
2595 * Returns the last value for 0 for no substitution, 1 for substitution.
2596 */
2597
2598int
2599xmlSubstituteEntitiesDefault(int val) {
2600 int old = xmlSubstituteEntitiesDefaultValue;
2601
2602 xmlSubstituteEntitiesDefaultValue = val;
2603 return(old);
2604}
2605
2606/**
2607 * xmlKeepBlanksDefault:
2608 * @val: int 0 or 1
2609 *
2610 * Set and return the previous value for default blanks text nodes support.
2611 * The 1.x version of the parser used an heuristic to try to detect
2612 * ignorable white spaces. As a result the SAX callback was generating
2613 * ignorableWhitespace() callbacks instead of characters() one, and when
2614 * using the DOM output text nodes containing those blanks were not generated.
2615 * The 2.x and later version will switch to the XML standard way and
2616 * ignorableWhitespace() are only generated when running the parser in
2617 * validating mode and when the current element doesn't allow CDATA or
2618 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002619 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002620 * on 1.X libs and to switch back to the old mode for compatibility when
2621 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2622 * by using xmlIsBlankNode() commodity function to detect the "empty"
2623 * nodes generated.
2624 * This value also affect autogeneration of indentation when saving code
2625 * if blanks sections are kept, indentation is not generated.
2626 *
2627 * Returns the last value for 0 for no substitution, 1 for substitution.
2628 */
2629
2630int
2631xmlKeepBlanksDefault(int val) {
2632 int old = xmlKeepBlanksDefaultValue;
2633
2634 xmlKeepBlanksDefaultValue = val;
2635 xmlIndentTreeOutput = !val;
2636 return(old);
2637}
2638
2639/************************************************************************
2640 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002641 * Deprecated functions kept for compatibility *
2642 * *
2643 ************************************************************************/
2644
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002645/**
2646 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002647 * @lang: pointer to the string value
2648 *
2649 * Checks that the value conforms to the LanguageID production:
2650 *
2651 * NOTE: this is somewhat deprecated, those productions were removed from
2652 * the XML Second edition.
2653 *
2654 * [33] LanguageID ::= Langcode ('-' Subcode)*
2655 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2656 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2657 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2658 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2659 * [38] Subcode ::= ([a-z] | [A-Z])+
2660 *
2661 * Returns 1 if correct 0 otherwise
2662 **/
2663int
2664xmlCheckLanguageID(const xmlChar *lang) {
2665 const xmlChar *cur = lang;
2666
2667 if (cur == NULL)
2668 return(0);
2669 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2670 ((cur[0] == 'I') && (cur[1] == '-'))) {
2671 /*
2672 * IANA code
2673 */
2674 cur += 2;
2675 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2676 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2677 cur++;
2678 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2679 ((cur[0] == 'X') && (cur[1] == '-'))) {
2680 /*
2681 * User code
2682 */
2683 cur += 2;
2684 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2685 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2686 cur++;
2687 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2688 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2689 /*
2690 * ISO639
2691 */
2692 cur++;
2693 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2694 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2695 cur++;
2696 else
2697 return(0);
2698 } else
2699 return(0);
2700 while (cur[0] != 0) { /* non input consuming */
2701 if (cur[0] != '-')
2702 return(0);
2703 cur++;
2704 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2705 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2706 cur++;
2707 else
2708 return(0);
2709 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2710 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2711 cur++;
2712 }
2713 return(1);
2714}
2715
2716/**
2717 * xmlDecodeEntities:
2718 * @ctxt: the parser context
2719 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2720 * @len: the len to decode (in bytes !), -1 for no size limit
2721 * @end: an end marker xmlChar, 0 if none
2722 * @end2: an end marker xmlChar, 0 if none
2723 * @end3: an end marker xmlChar, 0 if none
2724 *
2725 * This function is deprecated, we now always process entities content
2726 * through xmlStringDecodeEntities
2727 *
2728 * TODO: remove it in next major release.
2729 *
2730 * [67] Reference ::= EntityRef | CharRef
2731 *
2732 * [69] PEReference ::= '%' Name ';'
2733 *
2734 * Returns A newly allocated string with the substitution done. The caller
2735 * must deallocate it !
2736 */
2737xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002738xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2739 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002740#if 0
2741 xmlChar *buffer = NULL;
2742 unsigned int buffer_size = 0;
2743 unsigned int nbchars = 0;
2744
2745 xmlChar *current = NULL;
2746 xmlEntityPtr ent;
2747 unsigned int max = (unsigned int) len;
2748 int c,l;
2749#endif
2750
2751 static int deprecated = 0;
2752 if (!deprecated) {
2753 xmlGenericError(xmlGenericErrorContext,
2754 "xmlDecodeEntities() deprecated function reached\n");
2755 deprecated = 1;
2756 }
2757
2758#if 0
2759 if (ctxt->depth > 40) {
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "Detected entity reference loop\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2766 return(NULL);
2767 }
2768
2769 /*
2770 * allocate a translation buffer.
2771 */
2772 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2773 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2774 if (buffer == NULL) {
2775 perror("xmlDecodeEntities: malloc failed");
2776 return(NULL);
2777 }
2778
2779 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002780 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002781 */
2782 GROW;
2783 c = CUR_CHAR(l);
2784 while ((nbchars < max) && (c != end) && /* NOTUSED */
2785 (c != end2) && (c != end3)) {
2786 GROW;
2787 if (c == 0) break;
2788 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2789 int val = xmlParseCharRef(ctxt);
2790 COPY_BUF(0,buffer,nbchars,val);
2791 NEXTL(l);
2792 } else if ((c == '&') && (ctxt->token != '&') &&
2793 (what & XML_SUBSTITUTE_REF)) {
2794 if (xmlParserDebugEntities)
2795 xmlGenericError(xmlGenericErrorContext,
2796 "decoding Entity Reference\n");
2797 ent = xmlParseEntityRef(ctxt);
2798 if ((ent != NULL) &&
2799 (ctxt->replaceEntities != 0)) {
2800 current = ent->content;
2801 while (*current != 0) { /* non input consuming loop */
2802 buffer[nbchars++] = *current++;
2803 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2804 growBuffer(buffer);
2805 }
2806 }
2807 } else if (ent != NULL) {
2808 const xmlChar *cur = ent->name;
2809
2810 buffer[nbchars++] = '&';
2811 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2812 growBuffer(buffer);
2813 }
2814 while (*cur != 0) { /* non input consuming loop */
2815 buffer[nbchars++] = *cur++;
2816 }
2817 buffer[nbchars++] = ';';
2818 }
2819 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2820 /*
2821 * a PEReference induce to switch the entity flow,
2822 * we break here to flush the current set of chars
2823 * parsed if any. We will be called back later.
2824 */
2825 if (xmlParserDebugEntities)
2826 xmlGenericError(xmlGenericErrorContext,
2827 "decoding PE Reference\n");
2828 if (nbchars != 0) break;
2829
2830 xmlParsePEReference(ctxt);
2831
2832 /*
2833 * Pop-up of finished entities.
2834 */
2835 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2836 xmlPopInput(ctxt);
2837
2838 break;
2839 } else {
2840 COPY_BUF(l,buffer,nbchars,c);
2841 NEXTL(l);
2842 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2843 growBuffer(buffer);
2844 }
2845 }
2846 c = CUR_CHAR(l);
2847 }
2848 buffer[nbchars++] = 0;
2849 return(buffer);
2850#endif
2851 return(NULL);
2852}
2853
2854/**
2855 * xmlNamespaceParseNCName:
2856 * @ctxt: an XML parser context
2857 *
2858 * parse an XML namespace name.
2859 *
2860 * TODO: this seems not in use anymore, the namespace handling is done on
2861 * top of the SAX interfaces, i.e. not on raw input.
2862 *
2863 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2864 *
2865 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2866 * CombiningChar | Extender
2867 *
2868 * Returns the namespace name or NULL
2869 */
2870
2871xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002872xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002873#if 0
2874 xmlChar buf[XML_MAX_NAMELEN + 5];
2875 int len = 0, l;
2876 int cur = CUR_CHAR(l);
2877#endif
2878
2879 static int deprecated = 0;
2880 if (!deprecated) {
2881 xmlGenericError(xmlGenericErrorContext,
2882 "xmlNamespaceParseNCName() deprecated function reached\n");
2883 deprecated = 1;
2884 }
2885
2886#if 0
2887 /* load first the value of the char !!! */
2888 GROW;
2889 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2890
2891xmlGenericError(xmlGenericErrorContext,
2892 "xmlNamespaceParseNCName: reached loop 3\n");
2893 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2894 (cur == '.') || (cur == '-') ||
2895 (cur == '_') ||
2896 (IS_COMBINING(cur)) ||
2897 (IS_EXTENDER(cur))) {
2898 COPY_BUF(l,buf,len,cur);
2899 NEXTL(l);
2900 cur = CUR_CHAR(l);
2901 if (len >= XML_MAX_NAMELEN) {
2902 xmlGenericError(xmlGenericErrorContext,
2903 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2904 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2905 (cur == '.') || (cur == '-') ||
2906 (cur == '_') ||
2907 (IS_COMBINING(cur)) ||
2908 (IS_EXTENDER(cur))) {
2909 NEXTL(l);
2910 cur = CUR_CHAR(l);
2911 }
2912 break;
2913 }
2914 }
2915 return(xmlStrndup(buf, len));
2916#endif
2917 return(NULL);
2918}
2919
2920/**
2921 * xmlNamespaceParseQName:
2922 * @ctxt: an XML parser context
2923 * @prefix: a xmlChar **
2924 *
2925 * TODO: this seems not in use anymore, the namespace handling is done on
2926 * top of the SAX interfaces, i.e. not on raw input.
2927 *
2928 * parse an XML qualified name
2929 *
2930 * [NS 5] QName ::= (Prefix ':')? LocalPart
2931 *
2932 * [NS 6] Prefix ::= NCName
2933 *
2934 * [NS 7] LocalPart ::= NCName
2935 *
2936 * Returns the local part, and prefix is updated
2937 * to get the Prefix if any.
2938 */
2939
2940xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002941xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002942
2943 static int deprecated = 0;
2944 if (!deprecated) {
2945 xmlGenericError(xmlGenericErrorContext,
2946 "xmlNamespaceParseQName() deprecated function reached\n");
2947 deprecated = 1;
2948 }
2949
2950#if 0
2951 xmlChar *ret = NULL;
2952
2953 *prefix = NULL;
2954 ret = xmlNamespaceParseNCName(ctxt);
2955 if (RAW == ':') {
2956 *prefix = ret;
2957 NEXT;
2958 ret = xmlNamespaceParseNCName(ctxt);
2959 }
2960
2961 return(ret);
2962#endif
2963 return(NULL);
2964}
2965
2966/**
2967 * xmlNamespaceParseNSDef:
2968 * @ctxt: an XML parser context
2969 *
2970 * parse a namespace prefix declaration
2971 *
2972 * TODO: this seems not in use anymore, the namespace handling is done on
2973 * top of the SAX interfaces, i.e. not on raw input.
2974 *
2975 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2976 *
2977 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2978 *
2979 * Returns the namespace name
2980 */
2981
2982xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002983xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002984 static int deprecated = 0;
2985 if (!deprecated) {
2986 xmlGenericError(xmlGenericErrorContext,
2987 "xmlNamespaceParseNSDef() deprecated function reached\n");
2988 deprecated = 1;
2989 }
2990 return(NULL);
2991#if 0
2992 xmlChar *name = NULL;
2993
2994 if ((RAW == 'x') && (NXT(1) == 'm') &&
2995 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2996 (NXT(4) == 's')) {
2997 SKIP(5);
2998 if (RAW == ':') {
2999 NEXT;
3000 name = xmlNamespaceParseNCName(ctxt);
3001 }
3002 }
3003 return(name);
3004#endif
3005}
3006
3007/**
3008 * xmlParseQuotedString:
3009 * @ctxt: an XML parser context
3010 *
3011 * Parse and return a string between quotes or doublequotes
3012 *
3013 * TODO: Deprecated, to be removed at next drop of binary compatibility
3014 *
3015 * Returns the string parser or NULL.
3016 */
3017xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003018xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003019 static int deprecated = 0;
3020 if (!deprecated) {
3021 xmlGenericError(xmlGenericErrorContext,
3022 "xmlParseQuotedString() deprecated function reached\n");
3023 deprecated = 1;
3024 }
3025 return(NULL);
3026
3027#if 0
3028 xmlChar *buf = NULL;
3029 int len = 0,l;
3030 int size = XML_PARSER_BUFFER_SIZE;
3031 int c;
3032
3033 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3034 if (buf == NULL) {
3035 xmlGenericError(xmlGenericErrorContext,
3036 "malloc of %d byte failed\n", size);
3037 return(NULL);
3038 }
3039xmlGenericError(xmlGenericErrorContext,
3040 "xmlParseQuotedString: reached loop 4\n");
3041 if (RAW == '"') {
3042 NEXT;
3043 c = CUR_CHAR(l);
3044 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3045 if (len + 5 >= size) {
3046 size *= 2;
3047 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3048 if (buf == NULL) {
3049 xmlGenericError(xmlGenericErrorContext,
3050 "realloc of %d byte failed\n", size);
3051 return(NULL);
3052 }
3053 }
3054 COPY_BUF(l,buf,len,c);
3055 NEXTL(l);
3056 c = CUR_CHAR(l);
3057 }
3058 if (c != '"') {
3059 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3061 ctxt->sax->error(ctxt->userData,
3062 "String not closed \"%.50s\"\n", buf);
3063 ctxt->wellFormed = 0;
3064 ctxt->disableSAX = 1;
3065 } else {
3066 NEXT;
3067 }
3068 } else if (RAW == '\''){
3069 NEXT;
3070 c = CUR;
3071 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3072 if (len + 1 >= size) {
3073 size *= 2;
3074 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3075 if (buf == NULL) {
3076 xmlGenericError(xmlGenericErrorContext,
3077 "realloc of %d byte failed\n", size);
3078 return(NULL);
3079 }
3080 }
3081 buf[len++] = c;
3082 NEXT;
3083 c = CUR;
3084 }
3085 if (RAW != '\'') {
3086 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3088 ctxt->sax->error(ctxt->userData,
3089 "String not closed \"%.50s\"\n", buf);
3090 ctxt->wellFormed = 0;
3091 ctxt->disableSAX = 1;
3092 } else {
3093 NEXT;
3094 }
3095 }
3096 return(buf);
3097#endif
3098}
3099
3100/**
3101 * xmlParseNamespace:
3102 * @ctxt: an XML parser context
3103 *
3104 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3105 *
3106 * This is what the older xml-name Working Draft specified, a bunch of
3107 * other stuff may still rely on it, so support is still here as
3108 * if it was declared on the root of the Tree:-(
3109 *
3110 * TODO: remove from library
3111 *
3112 * To be removed at next drop of binary compatibility
3113 */
3114
3115void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003116xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003117 static int deprecated = 0;
3118 if (!deprecated) {
3119 xmlGenericError(xmlGenericErrorContext,
3120 "xmlParseNamespace() deprecated function reached\n");
3121 deprecated = 1;
3122 }
3123
3124#if 0
3125 xmlChar *href = NULL;
3126 xmlChar *prefix = NULL;
3127 int garbage = 0;
3128
3129 /*
3130 * We just skipped "namespace" or "xml:namespace"
3131 */
3132 SKIP_BLANKS;
3133
3134xmlGenericError(xmlGenericErrorContext,
3135 "xmlParseNamespace: reached loop 5\n");
3136 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3137 /*
3138 * We can have "ns" or "prefix" attributes
3139 * Old encoding as 'href' or 'AS' attributes is still supported
3140 */
3141 if ((RAW == 'n') && (NXT(1) == 's')) {
3142 garbage = 0;
3143 SKIP(2);
3144 SKIP_BLANKS;
3145
3146 if (RAW != '=') continue;
3147 NEXT;
3148 SKIP_BLANKS;
3149
3150 href = xmlParseQuotedString(ctxt);
3151 SKIP_BLANKS;
3152 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3153 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3154 garbage = 0;
3155 SKIP(4);
3156 SKIP_BLANKS;
3157
3158 if (RAW != '=') continue;
3159 NEXT;
3160 SKIP_BLANKS;
3161
3162 href = xmlParseQuotedString(ctxt);
3163 SKIP_BLANKS;
3164 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3165 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3166 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3167 garbage = 0;
3168 SKIP(6);
3169 SKIP_BLANKS;
3170
3171 if (RAW != '=') continue;
3172 NEXT;
3173 SKIP_BLANKS;
3174
3175 prefix = xmlParseQuotedString(ctxt);
3176 SKIP_BLANKS;
3177 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3178 garbage = 0;
3179 SKIP(2);
3180 SKIP_BLANKS;
3181
3182 if (RAW != '=') continue;
3183 NEXT;
3184 SKIP_BLANKS;
3185
3186 prefix = xmlParseQuotedString(ctxt);
3187 SKIP_BLANKS;
3188 } else if ((RAW == '?') && (NXT(1) == '>')) {
3189 garbage = 0;
3190 NEXT;
3191 } else {
3192 /*
3193 * Found garbage when parsing the namespace
3194 */
3195 if (!garbage) {
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData,
3198 "xmlParseNamespace found garbage\n");
3199 }
3200 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3201 ctxt->wellFormed = 0;
3202 ctxt->disableSAX = 1;
3203 NEXT;
3204 }
3205 }
3206
3207 MOVETO_ENDTAG(CUR_PTR);
3208 NEXT;
3209
3210 /*
3211 * Register the DTD.
3212 if (href != NULL)
3213 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3214 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3215 */
3216
3217 if (prefix != NULL) xmlFree(prefix);
3218 if (href != NULL) xmlFree(href);
3219#endif
3220}
3221
3222/**
3223 * xmlScanName:
3224 * @ctxt: an XML parser context
3225 *
3226 * Trickery: parse an XML name but without consuming the input flow
3227 * Needed for rollback cases. Used only when parsing entities references.
3228 *
3229 * TODO: seems deprecated now, only used in the default part of
3230 * xmlParserHandleReference
3231 *
3232 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3233 * CombiningChar | Extender
3234 *
3235 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3236 *
3237 * [6] Names ::= Name (S Name)*
3238 *
3239 * Returns the Name parsed or NULL
3240 */
3241
3242xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003243xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003244 static int deprecated = 0;
3245 if (!deprecated) {
3246 xmlGenericError(xmlGenericErrorContext,
3247 "xmlScanName() deprecated function reached\n");
3248 deprecated = 1;
3249 }
3250 return(NULL);
3251
3252#if 0
3253 xmlChar buf[XML_MAX_NAMELEN];
3254 int len = 0;
3255
3256 GROW;
3257 if (!IS_LETTER(RAW) && (RAW != '_') &&
3258 (RAW != ':')) {
3259 return(NULL);
3260 }
3261
3262
3263 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3264 (NXT(len) == '.') || (NXT(len) == '-') ||
3265 (NXT(len) == '_') || (NXT(len) == ':') ||
3266 (IS_COMBINING(NXT(len))) ||
3267 (IS_EXTENDER(NXT(len)))) {
3268 GROW;
3269 buf[len] = NXT(len);
3270 len++;
3271 if (len >= XML_MAX_NAMELEN) {
3272 xmlGenericError(xmlGenericErrorContext,
3273 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3274 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3275 (IS_DIGIT(NXT(len))) ||
3276 (NXT(len) == '.') || (NXT(len) == '-') ||
3277 (NXT(len) == '_') || (NXT(len) == ':') ||
3278 (IS_COMBINING(NXT(len))) ||
3279 (IS_EXTENDER(NXT(len))))
3280 len++;
3281 break;
3282 }
3283 }
3284 return(xmlStrndup(buf, len));
3285#endif
3286}
3287
3288/**
3289 * xmlParserHandleReference:
3290 * @ctxt: the parser context
3291 *
3292 * TODO: Remove, now deprecated ... the test is done directly in the
3293 * content parsing
3294 * routines.
3295 *
3296 * [67] Reference ::= EntityRef | CharRef
3297 *
3298 * [68] EntityRef ::= '&' Name ';'
3299 *
3300 * [ WFC: Entity Declared ]
3301 * the Name given in the entity reference must match that in an entity
3302 * declaration, except that well-formed documents need not declare any
3303 * of the following entities: amp, lt, gt, apos, quot.
3304 *
3305 * [ WFC: Parsed Entity ]
3306 * An entity reference must not contain the name of an unparsed entity
3307 *
3308 * [66] CharRef ::= '&#' [0-9]+ ';' |
3309 * '&#x' [0-9a-fA-F]+ ';'
3310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * the handling is done accordingly to
3313 * http://www.w3.org/TR/REC-xml#entproc
3314 */
3315void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003316xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003317 static int deprecated = 0;
3318 if (!deprecated) {
3319 xmlGenericError(xmlGenericErrorContext,
3320 "xmlParserHandleReference() deprecated function reached\n");
3321 deprecated = 1;
3322 }
3323
3324#if 0
3325 xmlParserInputPtr input;
3326 xmlChar *name;
3327 xmlEntityPtr ent = NULL;
3328
3329 if (ctxt->token != 0) {
3330 return;
3331 }
3332 if (RAW != '&') return;
3333 GROW;
3334 if ((RAW == '&') && (NXT(1) == '#')) {
3335 switch(ctxt->instate) {
3336 case XML_PARSER_ENTITY_DECL:
3337 case XML_PARSER_PI:
3338 case XML_PARSER_CDATA_SECTION:
3339 case XML_PARSER_COMMENT:
3340 case XML_PARSER_SYSTEM_LITERAL:
3341 /* we just ignore it there */
3342 return;
3343 case XML_PARSER_START_TAG:
3344 return;
3345 case XML_PARSER_END_TAG:
3346 return;
3347 case XML_PARSER_EOF:
3348 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3351 ctxt->wellFormed = 0;
3352 ctxt->disableSAX = 1;
3353 return;
3354 case XML_PARSER_PROLOG:
3355 case XML_PARSER_START:
3356 case XML_PARSER_MISC:
3357 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3359 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3360 ctxt->wellFormed = 0;
3361 ctxt->disableSAX = 1;
3362 return;
3363 case XML_PARSER_EPILOG:
3364 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3367 ctxt->wellFormed = 0;
3368 ctxt->disableSAX = 1;
3369 return;
3370 case XML_PARSER_DTD:
3371 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3373 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003374 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003375 ctxt->wellFormed = 0;
3376 ctxt->disableSAX = 1;
3377 return;
3378 case XML_PARSER_ENTITY_VALUE:
3379 /*
3380 * NOTE: in the case of entity values, we don't do the
3381 * substitution here since we need the literal
3382 * entity value to be able to save the internal
3383 * subset of the document.
3384 * This will be handled by xmlStringDecodeEntities
3385 */
3386 return;
3387 case XML_PARSER_CONTENT:
3388 return;
3389 case XML_PARSER_ATTRIBUTE_VALUE:
3390 /* ctxt->token = xmlParseCharRef(ctxt); */
3391 return;
3392 case XML_PARSER_IGNORE:
3393 return;
3394 }
3395 return;
3396 }
3397
3398 switch(ctxt->instate) {
3399 case XML_PARSER_CDATA_SECTION:
3400 return;
3401 case XML_PARSER_PI:
3402 case XML_PARSER_COMMENT:
3403 case XML_PARSER_SYSTEM_LITERAL:
3404 case XML_PARSER_CONTENT:
3405 return;
3406 case XML_PARSER_START_TAG:
3407 return;
3408 case XML_PARSER_END_TAG:
3409 return;
3410 case XML_PARSER_EOF:
3411 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3413 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3414 ctxt->wellFormed = 0;
3415 ctxt->disableSAX = 1;
3416 return;
3417 case XML_PARSER_PROLOG:
3418 case XML_PARSER_START:
3419 case XML_PARSER_MISC:
3420 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3423 ctxt->wellFormed = 0;
3424 ctxt->disableSAX = 1;
3425 return;
3426 case XML_PARSER_EPILOG:
3427 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3429 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3430 ctxt->wellFormed = 0;
3431 ctxt->disableSAX = 1;
3432 return;
3433 case XML_PARSER_ENTITY_VALUE:
3434 /*
3435 * NOTE: in the case of entity values, we don't do the
3436 * substitution here since we need the literal
3437 * entity value to be able to save the internal
3438 * subset of the document.
3439 * This will be handled by xmlStringDecodeEntities
3440 */
3441 return;
3442 case XML_PARSER_ATTRIBUTE_VALUE:
3443 /*
3444 * NOTE: in the case of attributes values, we don't do the
3445 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003446 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003447 * entities. The SAX callback is called with values
3448 * without entity substitution.
3449 * This will then be handled by xmlStringDecodeEntities
3450 */
3451 return;
3452 case XML_PARSER_ENTITY_DECL:
3453 /*
3454 * we just ignore it there
3455 * the substitution will be done once the entity is referenced
3456 */
3457 return;
3458 case XML_PARSER_DTD:
3459 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003462 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 return;
3466 case XML_PARSER_IGNORE:
3467 return;
3468 }
3469
3470/* TODO: this seems not reached anymore .... Verify ... */
3471xmlGenericError(xmlGenericErrorContext,
3472 "Reached deprecated section in xmlParserHandleReference()\n");
3473xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003474 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003475xmlGenericError(xmlGenericErrorContext,
3476 "indicating the version: %s, thanks !\n", xmlParserVersion);
3477 NEXT;
3478 name = xmlScanName(ctxt);
3479 if (name == NULL) {
3480 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3482 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3483 ctxt->wellFormed = 0;
3484 ctxt->disableSAX = 1;
3485 ctxt->token = '&';
3486 return;
3487 }
3488 if (NXT(xmlStrlen(name)) != ';') {
3489 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "Entity reference: ';' expected\n");
3493 ctxt->wellFormed = 0;
3494 ctxt->disableSAX = 1;
3495 ctxt->token = '&';
3496 xmlFree(name);
3497 return;
3498 }
3499 SKIP(xmlStrlen(name) + 1);
3500 if (ctxt->sax != NULL) {
3501 if (ctxt->sax->getEntity != NULL)
3502 ent = ctxt->sax->getEntity(ctxt->userData, name);
3503 }
3504
3505 /*
3506 * [ WFC: Entity Declared ]
3507 * the Name given in the entity reference must match that in an entity
3508 * declaration, except that well-formed documents need not declare any
3509 * of the following entities: amp, lt, gt, apos, quot.
3510 */
3511 if (ent == NULL)
3512 ent = xmlGetPredefinedEntity(name);
3513 if (ent == NULL) {
3514 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3516 ctxt->sax->error(ctxt->userData,
3517 "Entity reference: entity %s not declared\n",
3518 name);
3519 ctxt->wellFormed = 0;
3520 ctxt->disableSAX = 1;
3521 xmlFree(name);
3522 return;
3523 }
3524
3525 /*
3526 * [ WFC: Parsed Entity ]
3527 * An entity reference must not contain the name of an unparsed entity
3528 */
3529 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3530 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3532 ctxt->sax->error(ctxt->userData,
3533 "Entity reference to unparsed entity %s\n", name);
3534 ctxt->wellFormed = 0;
3535 ctxt->disableSAX = 1;
3536 }
3537
3538 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3539 ctxt->token = ent->content[0];
3540 xmlFree(name);
3541 return;
3542 }
3543 input = xmlNewEntityInputStream(ctxt, ent);
3544 xmlPushInput(ctxt, input);
3545 xmlFree(name);
3546#endif
3547 return;
3548}
3549
3550/**
3551 * xmlHandleEntity:
3552 * @ctxt: an XML parser context
3553 * @entity: an XML entity pointer.
3554 *
3555 * Default handling of defined entities, when should we define a new input
3556 * stream ? When do we just handle that as a set of chars ?
3557 *
3558 * OBSOLETE: to be removed at some point.
3559 */
3560
3561void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003562xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 static int deprecated = 0;
3564 if (!deprecated) {
3565 xmlGenericError(xmlGenericErrorContext,
3566 "xmlHandleEntity() deprecated function reached\n");
3567 deprecated = 1;
3568 }
3569
3570#if 0
3571 int len;
3572 xmlParserInputPtr input;
3573
3574 if (entity->content == NULL) {
3575 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3578 entity->name);
3579 ctxt->wellFormed = 0;
3580 ctxt->disableSAX = 1;
3581 return;
3582 }
3583 len = xmlStrlen(entity->content);
3584 if (len <= 2) goto handle_as_char;
3585
3586 /*
3587 * Redefine its content as an input stream.
3588 */
3589 input = xmlNewEntityInputStream(ctxt, entity);
3590 xmlPushInput(ctxt, input);
3591 return;
3592
3593handle_as_char:
3594 /*
3595 * Just handle the content as a set of chars.
3596 */
3597 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3598 (ctxt->sax->characters != NULL))
3599 ctxt->sax->characters(ctxt->userData, entity->content, len);
3600#endif
3601}
3602
3603/**
3604 * xmlNewGlobalNs:
3605 * @doc: the document carrying the namespace
3606 * @href: the URI associated
3607 * @prefix: the prefix for the namespace
3608 *
3609 * Creation of a Namespace, the old way using PI and without scoping
3610 * DEPRECATED !!!
3611 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003612 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003613 */
3614xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003615xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3616 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003617 static int deprecated = 0;
3618 if (!deprecated) {
3619 xmlGenericError(xmlGenericErrorContext,
3620 "xmlNewGlobalNs() deprecated function reached\n");
3621 deprecated = 1;
3622 }
3623 return(NULL);
3624#if 0
3625 xmlNodePtr root;
3626
3627 xmlNsPtr cur;
3628
3629 root = xmlDocGetRootElement(doc);
3630 if (root != NULL)
3631 return(xmlNewNs(root, href, prefix));
3632
3633 /*
3634 * if there is no root element yet, create an old Namespace type
3635 * and it will be moved to the root at save time.
3636 */
3637 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3638 if (cur == NULL) {
3639 xmlGenericError(xmlGenericErrorContext,
3640 "xmlNewGlobalNs : malloc failed\n");
3641 return(NULL);
3642 }
3643 memset(cur, 0, sizeof(xmlNs));
3644 cur->type = XML_GLOBAL_NAMESPACE;
3645
3646 if (href != NULL)
3647 cur->href = xmlStrdup(href);
3648 if (prefix != NULL)
3649 cur->prefix = xmlStrdup(prefix);
3650
3651 /*
3652 * Add it at the end to preserve parsing order ...
3653 */
3654 if (doc != NULL) {
3655 if (doc->oldNs == NULL) {
3656 doc->oldNs = cur;
3657 } else {
3658 xmlNsPtr prev = doc->oldNs;
3659
3660 while (prev->next != NULL) prev = prev->next;
3661 prev->next = cur;
3662 }
3663 }
3664
3665 return(NULL);
3666#endif
3667}
3668
3669/**
3670 * xmlUpgradeOldNs:
3671 * @doc: a document pointer
3672 *
3673 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3674 * DEPRECATED
3675 */
3676void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003677xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003678 static int deprecated = 0;
3679 if (!deprecated) {
3680 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003681 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003682 deprecated = 1;
3683 }
3684#if 0
3685 xmlNsPtr cur;
3686
3687 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3688 if (doc->children == NULL) {
3689#ifdef DEBUG_TREE
3690 xmlGenericError(xmlGenericErrorContext,
3691 "xmlUpgradeOldNs: failed no root !\n");
3692#endif
3693 return;
3694 }
3695
3696 cur = doc->oldNs;
3697 while (cur->next != NULL) {
3698 cur->type = XML_LOCAL_NAMESPACE;
3699 cur = cur->next;
3700 }
3701 cur->type = XML_LOCAL_NAMESPACE;
3702 cur->next = doc->children->nsDef;
3703 doc->children->nsDef = doc->oldNs;
3704 doc->oldNs = NULL;
3705#endif
3706}
3707