blob: c9ea6ed8175ad6f4bd53e4322c4892b9493a5b0d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
415/**
416 * xmlIsBaseChar:
417 * @c: an unicode character (int)
418 *
419 * Check whether the character is allowed by the production
420 * [85] BaseChar ::= ... long list see REC ...
421 *
422 * VI is your friend !
423 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
424 * and
425 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
426 *
427 * Returns 0 if not, non-zero otherwise
428 */
429static int xmlBaseArray[] = {
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
434 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
443 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
446};
447
448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
894void check_buffer(xmlParserInputPtr in) {
895 if (in->base != in->buf->buffer->content) {
896 xmlGenericError(xmlGenericErrorContext,
897 "xmlParserInput: base mismatch problem\n");
898 }
899 if (in->cur < in->base) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: cur < base problem\n");
902 }
903 if (in->cur > in->base + in->buf->buffer->use) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur > base + use problem\n");
906 }
907 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
908 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
909 in->buf->buffer->use, in->buf->buffer->size);
910}
911
912#else
913#define CHECK_BUFFER(in)
914#endif
915
916
917/**
918 * xmlParserInputRead:
919 * @in: an XML parser input
920 * @len: an indicative size for the lookahead
921 *
922 * This function refresh the input for the parser. It doesn't try to
923 * preserve pointers to the input buffer, and discard already read data
924 *
925 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
926 * end of this entity
927 */
928int
929xmlParserInputRead(xmlParserInputPtr in, int len) {
930 int ret;
931 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000932 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934#ifdef DEBUG_INPUT
935 xmlGenericError(xmlGenericErrorContext, "Read\n");
936#endif
937 if (in->buf == NULL) return(-1);
938 if (in->base == NULL) return(-1);
939 if (in->cur == NULL) return(-1);
940 if (in->buf->buffer == NULL) return(-1);
941 if (in->buf->readcallback == NULL) return(-1);
942
943 CHECK_BUFFER(in);
944
945 used = in->cur - in->buf->buffer->content;
946 ret = xmlBufferShrink(in->buf->buffer, used);
947 if (ret > 0) {
948 in->cur -= ret;
949 in->consumed += ret;
950 }
951 ret = xmlParserInputBufferRead(in->buf, len);
952 if (in->base != in->buf->buffer->content) {
953 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000954 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000955 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000956 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000957 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000959 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000960 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000961
962 CHECK_BUFFER(in);
963
964 return(ret);
965}
966
967/**
968 * xmlParserInputGrow:
969 * @in: an XML parser input
970 * @len: an indicative size for the lookahead
971 *
972 * This function increase the input for the parser. It tries to
973 * preserve pointers to the input buffer, and keep already read data
974 *
975 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
976 * end of this entity
977 */
978int
979xmlParserInputGrow(xmlParserInputPtr in, int len) {
980 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000981 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000982
983#ifdef DEBUG_INPUT
984 xmlGenericError(xmlGenericErrorContext, "Grow\n");
985#endif
986 if (in->buf == NULL) return(-1);
987 if (in->base == NULL) return(-1);
988 if (in->cur == NULL) return(-1);
989 if (in->buf->buffer == NULL) return(-1);
990
991 CHECK_BUFFER(in);
992
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000993 indx = in->cur - in->base;
994 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995
996 CHECK_BUFFER(in);
997
998 return(0);
999 }
1000 if (in->buf->readcallback != NULL)
1001 ret = xmlParserInputBufferGrow(in->buf, len);
1002 else
1003 return(0);
1004
1005 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001006 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001007 * block, but we use it really as an integer to do some
1008 * pointer arithmetic. Insure will raise it as a bug but in
1009 * that specific case, that's not !
1010 */
1011 if (in->base != in->buf->buffer->content) {
1012 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001013 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001014 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001015 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001016 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001018 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001019 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001020
1021 CHECK_BUFFER(in);
1022
1023 return(ret);
1024}
1025
1026/**
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1029 *
1030 * This function removes used input for the parser.
1031 */
1032void
1033xmlParserInputShrink(xmlParserInputPtr in) {
1034 int used;
1035 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001036 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001037
1038#ifdef DEBUG_INPUT
1039 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1040#endif
1041 if (in->buf == NULL) return;
1042 if (in->base == NULL) return;
1043 if (in->cur == NULL) return;
1044 if (in->buf->buffer == NULL) return;
1045
1046 CHECK_BUFFER(in);
1047
1048 used = in->cur - in->buf->buffer->content;
1049 /*
1050 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001051 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001052 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001053 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001054 return;
1055 if (used > INPUT_CHUNK) {
1056 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057 if (ret > 0) {
1058 in->cur -= ret;
1059 in->consumed += ret;
1060 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001061 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001072 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001073 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001075 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001077 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001078 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001079
1080 CHECK_BUFFER(in);
1081}
1082
1083/************************************************************************
1084 * *
1085 * UTF8 character input and related functions *
1086 * *
1087 ************************************************************************/
1088
1089/**
1090 * xmlNextChar:
1091 * @ctxt: the XML parser context
1092 *
1093 * Skip to the next char input char.
1094 */
1095
1096void
1097xmlNextChar(xmlParserCtxtPtr ctxt) {
1098 if (ctxt->instate == XML_PARSER_EOF)
1099 return;
1100
1101 /*
1102 * 2.11 End-of-Line Handling
1103 * the literal two-character sequence "#xD#xA" or a standalone
1104 * literal #xD, an XML processor must pass to the application
1105 * the single character #xA.
1106 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00001107 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001108 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001109 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1110 (ctxt->instate != XML_PARSER_COMMENT)) {
1111 /*
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1114 * automatically.
1115 * the auto closing should be blocked in other cases
1116 */
1117 xmlPopInput(ctxt);
1118 } else {
1119 if (*(ctxt->input->cur) == '\n') {
1120 ctxt->input->line++; ctxt->input->col = 1;
1121 } else ctxt->input->col++;
1122 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 *
1132 * Check for the 0x110000 limit too
1133 */
1134 const unsigned char *cur = ctxt->input->cur;
1135 unsigned char c;
1136
1137 c = *cur;
1138 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001139 if (cur[1] == 0)
1140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1141 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001142 goto encoding_error;
1143 if ((c & 0xe0) == 0xe0) {
1144 unsigned int val;
1145
Daniel Veillard561b7f82002-03-20 21:55:57 +00001146 if (cur[2] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001149 goto encoding_error;
1150 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001151 if (cur[3] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001154 ((cur[3] & 0xc0) != 0x80))
1155 goto encoding_error;
1156 /* 4-byte code */
1157 ctxt->input->cur += 4;
1158 val = (cur[0] & 0x7) << 18;
1159 val |= (cur[1] & 0x3f) << 12;
1160 val |= (cur[2] & 0x3f) << 6;
1161 val |= cur[3] & 0x3f;
1162 } else {
1163 /* 3-byte code */
1164 ctxt->input->cur += 3;
1165 val = (cur[0] & 0xf) << 12;
1166 val |= (cur[1] & 0x3f) << 6;
1167 val |= cur[2] & 0x3f;
1168 }
1169 if (((val > 0xd7ff) && (val < 0xe000)) ||
1170 ((val > 0xfffd) && (val < 0x10000)) ||
1171 (val >= 0x110000)) {
1172 if ((ctxt->sax != NULL) &&
1173 (ctxt->sax->error != NULL))
1174 ctxt->sax->error(ctxt->userData,
1175 "Char 0x%X out of allowed range\n", val);
1176 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1177 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001179 }
1180 } else
1181 /* 2-byte code */
1182 ctxt->input->cur += 2;
1183 } else
1184 /* 1-byte code */
1185 ctxt->input->cur++;
1186 } else {
1187 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001188 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001189 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001190 * XML constructs only use < 128 chars
1191 */
1192 ctxt->input->cur++;
1193 }
1194 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001197 }
1198 } else {
1199 ctxt->input->cur++;
1200 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001201 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001204 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001205 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001206 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1208 xmlPopInput(ctxt);
1209 return;
1210encoding_error:
1211 /*
1212 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001213 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1216 * encoding !)
1217 */
1218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1219 ctxt->sax->error(ctxt->userData,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001222 ctxt->input->cur[0], ctxt->input->cur[1],
1223 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001224 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001225 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001226 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1227
1228 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001229 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001230 return;
1231}
1232
1233/**
1234 * xmlCurrentChar:
1235 * @ctxt: the XML parser context
1236 * @len: pointer to the length of the char read
1237 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001238 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001239 * bytes in the input buffer. Implement the end of line normalization:
1240 * 2.11 End-of-Line Handling
1241 * Wherever an external parsed entity or the literal entity value
1242 * of an internal parsed entity contains either the literal two-character
1243 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1244 * must pass to the application the single character #xA.
1245 * This behavior can conveniently be produced by normalizing all
1246 * line breaks to #xA on input, before parsing.)
1247 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001248 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001249 */
1250
1251int
1252xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1253 if (ctxt->instate == XML_PARSER_EOF)
1254 return(0);
1255
Daniel Veillard561b7f82002-03-20 21:55:57 +00001256 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1257 *len = 1;
1258 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1261 /*
1262 * We are supposed to handle UTF8, check it's valid
1263 * From rfc2044: encoding of the Unicode values on UTF-8:
1264 *
1265 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1266 * 0000 0000-0000 007F 0xxxxxxx
1267 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1268 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1269 *
1270 * Check for the 0x110000 limit too
1271 */
1272 const unsigned char *cur = ctxt->input->cur;
1273 unsigned char c;
1274 unsigned int val;
1275
1276 c = *cur;
1277 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001278 if (cur[1] == 0)
1279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1280 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001281 goto encoding_error;
1282 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001283
1284 if (cur[2] == 0)
1285 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1286 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001287 goto encoding_error;
1288 if ((c & 0xf0) == 0xf0) {
1289 if (cur[3] == 0)
1290 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001291 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001292 ((cur[3] & 0xc0) != 0x80))
1293 goto encoding_error;
1294 /* 4-byte code */
1295 *len = 4;
1296 val = (cur[0] & 0x7) << 18;
1297 val |= (cur[1] & 0x3f) << 12;
1298 val |= (cur[2] & 0x3f) << 6;
1299 val |= cur[3] & 0x3f;
1300 } else {
1301 /* 3-byte code */
1302 *len = 3;
1303 val = (cur[0] & 0xf) << 12;
1304 val |= (cur[1] & 0x3f) << 6;
1305 val |= cur[2] & 0x3f;
1306 }
1307 } else {
1308 /* 2-byte code */
1309 *len = 2;
1310 val = (cur[0] & 0x1f) << 6;
1311 val |= cur[1] & 0x3f;
1312 }
1313 if (!IS_CHAR(val)) {
1314 if ((ctxt->sax != NULL) &&
1315 (ctxt->sax->error != NULL))
1316 ctxt->sax->error(ctxt->userData,
1317 "Char 0x%X out of allowed range\n", val);
1318 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1319 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001321 }
1322 return(val);
1323 } else {
1324 /* 1-byte code */
1325 *len = 1;
1326 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001327 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001328 ctxt->nbChars++;
1329 ctxt->input->cur++;
1330 }
1331 return(0xA);
1332 }
1333 return((int) *ctxt->input->cur);
1334 }
1335 }
1336 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001337 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001338 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001339 * XML constructs only use < 128 chars
1340 */
1341 *len = 1;
1342 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001343 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001344 ctxt->nbChars++;
1345 ctxt->input->cur++;
1346 }
1347 return(0xA);
1348 }
1349 return((int) *ctxt->input->cur);
1350encoding_error:
1351 /*
1352 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001353 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001354 * declaration header. Report the error and switch the encoding
1355 * to ISO-Latin-1 (if you don't like this policy, just declare the
1356 * encoding !)
1357 */
1358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1359 ctxt->sax->error(ctxt->userData,
1360 "Input is not proper UTF-8, indicate encoding !\n");
1361 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001362 ctxt->input->cur[0], ctxt->input->cur[1],
1363 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001364 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001365 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001366 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1367
1368 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1369 *len = 1;
1370 return((int) *ctxt->input->cur);
1371}
1372
1373/**
1374 * xmlStringCurrentChar:
1375 * @ctxt: the XML parser context
1376 * @cur: pointer to the beginning of the char
1377 * @len: pointer to the length of the char read
1378 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001379 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001380 * bytes in the input buffer.
1381 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001382 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001383 */
1384
1385int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001386xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1387{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001388 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001389 /*
1390 * We are supposed to handle UTF8, check it's valid
1391 * From rfc2044: encoding of the Unicode values on UTF-8:
1392 *
1393 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1394 * 0000 0000-0000 007F 0xxxxxxx
1395 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1396 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1397 *
1398 * Check for the 0x110000 limit too
1399 */
1400 unsigned char c;
1401 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001402
Daniel Veillardd8224e02002-01-13 15:43:22 +00001403 c = *cur;
1404 if (c & 0x80) {
1405 if ((cur[1] & 0xc0) != 0x80)
1406 goto encoding_error;
1407 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001408
Daniel Veillardd8224e02002-01-13 15:43:22 +00001409 if ((cur[2] & 0xc0) != 0x80)
1410 goto encoding_error;
1411 if ((c & 0xf0) == 0xf0) {
1412 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1413 goto encoding_error;
1414 /* 4-byte code */
1415 *len = 4;
1416 val = (cur[0] & 0x7) << 18;
1417 val |= (cur[1] & 0x3f) << 12;
1418 val |= (cur[2] & 0x3f) << 6;
1419 val |= cur[3] & 0x3f;
1420 } else {
1421 /* 3-byte code */
1422 *len = 3;
1423 val = (cur[0] & 0xf) << 12;
1424 val |= (cur[1] & 0x3f) << 6;
1425 val |= cur[2] & 0x3f;
1426 }
1427 } else {
1428 /* 2-byte code */
1429 *len = 2;
1430 val = (cur[0] & 0x1f) << 6;
1431 val |= cur[1] & 0x3f;
1432 }
1433 if (!IS_CHAR(val)) {
1434 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1435 (ctxt->sax->error != NULL))
1436 ctxt->sax->error(ctxt->userData,
1437 "Char 0x%X out of allowed range\n",
1438 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001439 if (ctxt != NULL) {
1440 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1441 ctxt->wellFormed = 0;
1442 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1443 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001444 }
1445 return (val);
1446 } else {
1447 /* 1-byte code */
1448 *len = 1;
1449 return ((int) *cur);
1450 }
Owen Taylor3473f882001-02-23 17:55:21 +00001451 }
1452 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001453 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001454 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001455 * XML constructs only use < 128 chars
1456 */
1457 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001458 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001459encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001460
Owen Taylor3473f882001-02-23 17:55:21 +00001461 /*
1462 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001463 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001464 * declaration header. Report the error and switch the encoding
1465 * to ISO-Latin-1 (if you don't like this policy, just declare the
1466 * encoding !)
1467 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001468 if (ctxt != NULL) {
1469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1470 ctxt->sax->error(ctxt->userData,
1471 "Input is not proper UTF-8, indicate encoding !\n");
1472 ctxt->sax->error(ctxt->userData,
1473 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1474 ctxt->input->cur[0], ctxt->input->cur[1],
1475 ctxt->input->cur[2], ctxt->input->cur[3]);
1476 }
1477 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001478 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001479 }
Owen Taylor3473f882001-02-23 17:55:21 +00001480
1481 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001482 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001483}
1484
1485/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001486 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001487 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001488 * @val: the char value
1489 *
1490 * append the char value in the array
1491 *
1492 * Returns the number of xmlChar written
1493 */
Owen Taylor3473f882001-02-23 17:55:21 +00001494int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001496 /*
1497 * We are supposed to handle UTF8, check it's valid
1498 * From rfc2044: encoding of the Unicode values on UTF-8:
1499 *
1500 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1501 * 0000 0000-0000 007F 0xxxxxxx
1502 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1503 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1504 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001505 if (val >= 0x80) {
1506 xmlChar *savedout = out;
1507 int bits;
1508 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1509 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1510 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1511 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001512 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001513 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001514 val);
1515 return(0);
1516 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001517 for ( ; bits >= 0; bits-= 6)
1518 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1519 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 }
1521 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001522 return 1;
1523}
1524
1525/**
1526 * xmlCopyChar:
1527 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001528 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001529 * @val: the char value
1530 *
1531 * append the char value in the array
1532 *
1533 * Returns the number of xmlChar written
1534 */
1535
1536int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001537xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538 /* the len parameter is ignored */
1539 if (val >= 0x80) {
1540 return(xmlCopyCharMultiByte (out, val));
1541 }
1542 *out = (xmlChar) val;
1543 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001544}
1545
1546/************************************************************************
1547 * *
1548 * Commodity functions to switch encodings *
1549 * *
1550 ************************************************************************/
1551
1552/**
1553 * xmlSwitchEncoding:
1554 * @ctxt: the parser context
1555 * @enc: the encoding value (number)
1556 *
1557 * change the input functions when discovering the character encoding
1558 * of a given entity.
1559 *
1560 * Returns 0 in case of success, -1 otherwise
1561 */
1562int
1563xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1564{
1565 xmlCharEncodingHandlerPtr handler;
1566
1567 switch (enc) {
1568 case XML_CHAR_ENCODING_ERROR:
1569 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1572 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001573 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001574 break;
1575 case XML_CHAR_ENCODING_NONE:
1576 /* let's assume it's UTF-8 without the XML decl */
1577 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1578 return(0);
1579 case XML_CHAR_ENCODING_UTF8:
1580 /* default encoding, no conversion should be needed */
1581 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001582
1583 /*
1584 * Errata on XML-1.0 June 20 2001
1585 * Specific handling of the Byte Order Mark for
1586 * UTF-8
1587 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001588 if ((ctxt->input != NULL) &&
1589 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001590 (ctxt->input->cur[1] == 0xBB) &&
1591 (ctxt->input->cur[2] == 0xBF)) {
1592 ctxt->input->cur += 3;
1593 }
Owen Taylor3473f882001-02-23 17:55:21 +00001594 return(0);
1595 default:
1596 break;
1597 }
1598 handler = xmlGetCharEncodingHandler(enc);
1599 if (handler == NULL) {
1600 /*
1601 * Default handlers.
1602 */
1603 switch (enc) {
1604 case XML_CHAR_ENCODING_ERROR:
1605 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1608 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001609 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1611 break;
1612 case XML_CHAR_ENCODING_NONE:
1613 /* let's assume it's UTF-8 without the XML decl */
1614 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1615 return(0);
1616 case XML_CHAR_ENCODING_UTF8:
1617 case XML_CHAR_ENCODING_ASCII:
1618 /* default encoding, no conversion should be needed */
1619 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1620 return(0);
1621 case XML_CHAR_ENCODING_UTF16LE:
1622 break;
1623 case XML_CHAR_ENCODING_UTF16BE:
1624 break;
1625 case XML_CHAR_ENCODING_UCS4LE:
1626 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1628 ctxt->sax->error(ctxt->userData,
1629 "char encoding USC4 little endian not supported\n");
1630 break;
1631 case XML_CHAR_ENCODING_UCS4BE:
1632 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1634 ctxt->sax->error(ctxt->userData,
1635 "char encoding USC4 big endian not supported\n");
1636 break;
1637 case XML_CHAR_ENCODING_EBCDIC:
1638 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1640 ctxt->sax->error(ctxt->userData,
1641 "char encoding EBCDIC not supported\n");
1642 break;
1643 case XML_CHAR_ENCODING_UCS4_2143:
1644 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1646 ctxt->sax->error(ctxt->userData,
1647 "char encoding UCS4 2143 not supported\n");
1648 break;
1649 case XML_CHAR_ENCODING_UCS4_3412:
1650 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1652 ctxt->sax->error(ctxt->userData,
1653 "char encoding UCS4 3412 not supported\n");
1654 break;
1655 case XML_CHAR_ENCODING_UCS2:
1656 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "char encoding UCS2 not supported\n");
1660 break;
1661 case XML_CHAR_ENCODING_8859_1:
1662 case XML_CHAR_ENCODING_8859_2:
1663 case XML_CHAR_ENCODING_8859_3:
1664 case XML_CHAR_ENCODING_8859_4:
1665 case XML_CHAR_ENCODING_8859_5:
1666 case XML_CHAR_ENCODING_8859_6:
1667 case XML_CHAR_ENCODING_8859_7:
1668 case XML_CHAR_ENCODING_8859_8:
1669 case XML_CHAR_ENCODING_8859_9:
1670 /*
1671 * We used to keep the internal content in the
1672 * document encoding however this turns being unmaintainable
1673 * So xmlGetCharEncodingHandler() will return non-null
1674 * values for this now.
1675 */
1676 if ((ctxt->inputNr == 1) &&
1677 (ctxt->encoding == NULL) &&
1678 (ctxt->input->encoding != NULL)) {
1679 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1680 }
1681 ctxt->charset = enc;
1682 return(0);
1683 case XML_CHAR_ENCODING_2022_JP:
1684 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686 ctxt->sax->error(ctxt->userData,
1687 "char encoding ISO-2022-JPnot supported\n");
1688 break;
1689 case XML_CHAR_ENCODING_SHIFT_JIS:
1690 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692 ctxt->sax->error(ctxt->userData,
1693 "char encoding Shift_JIS not supported\n");
1694 break;
1695 case XML_CHAR_ENCODING_EUC_JP:
1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt->userData,
1699 "char encoding EUC-JPnot supported\n");
1700 break;
1701 }
1702 }
1703 if (handler == NULL)
1704 return(-1);
1705 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1706 return(xmlSwitchToEncoding(ctxt, handler));
1707}
1708
1709/**
1710 * xmlSwitchToEncoding:
1711 * @ctxt: the parser context
1712 * @handler: the encoding handler
1713 *
1714 * change the input functions when discovering the character encoding
1715 * of a given entity.
1716 *
1717 * Returns 0 in case of success, -1 otherwise
1718 */
1719int
1720xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1721{
1722 int nbchars;
1723
1724 if (handler != NULL) {
1725 if (ctxt->input != NULL) {
1726 if (ctxt->input->buf != NULL) {
1727 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001728 /*
1729 * Check in case the auto encoding detetection triggered
1730 * in already.
1731 */
Owen Taylor3473f882001-02-23 17:55:21 +00001732 if (ctxt->input->buf->encoder == handler)
1733 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001734
1735 /*
1736 * "UTF-16" can be used for both LE and BE
1737 */
1738 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1739 BAD_CAST "UTF-16", 6)) &&
1740 (!xmlStrncmp(BAD_CAST handler->name,
1741 BAD_CAST "UTF-16", 6))) {
1742 return(0);
1743 }
1744
Owen Taylor3473f882001-02-23 17:55:21 +00001745 /*
1746 * Note: this is a bit dangerous, but that's what it
1747 * takes to use nearly compatible signature for different
1748 * encodings.
1749 */
1750 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1751 ctxt->input->buf->encoder = handler;
1752 return(0);
1753 }
1754 ctxt->input->buf->encoder = handler;
1755
1756 /*
1757 * Is there already some content down the pipe to convert ?
1758 */
1759 if ((ctxt->input->buf->buffer != NULL) &&
1760 (ctxt->input->buf->buffer->use > 0)) {
1761 int processed;
1762
1763 /*
1764 * Specific handling of the Byte Order Mark for
1765 * UTF-16
1766 */
1767 if ((handler->name != NULL) &&
1768 (!strcmp(handler->name, "UTF-16LE")) &&
1769 (ctxt->input->cur[0] == 0xFF) &&
1770 (ctxt->input->cur[1] == 0xFE)) {
1771 ctxt->input->cur += 2;
1772 }
1773 if ((handler->name != NULL) &&
1774 (!strcmp(handler->name, "UTF-16BE")) &&
1775 (ctxt->input->cur[0] == 0xFE) &&
1776 (ctxt->input->cur[1] == 0xFF)) {
1777 ctxt->input->cur += 2;
1778 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001779 /*
1780 * Errata on XML-1.0 June 20 2001
1781 * Specific handling of the Byte Order Mark for
1782 * UTF-8
1783 */
1784 if ((handler->name != NULL) &&
1785 (!strcmp(handler->name, "UTF-8")) &&
1786 (ctxt->input->cur[0] == 0xEF) &&
1787 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001788 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001789 ctxt->input->cur += 3;
1790 }
Owen Taylor3473f882001-02-23 17:55:21 +00001791
1792 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001793 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001794 * Move it as the raw buffer and create a new input buffer
1795 */
1796 processed = ctxt->input->cur - ctxt->input->base;
1797 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1798 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1799 ctxt->input->buf->buffer = xmlBufferCreate();
1800
1801 if (ctxt->html) {
1802 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001803 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001804 */
1805 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1806 ctxt->input->buf->buffer,
1807 ctxt->input->buf->raw);
1808 } else {
1809 /*
1810 * convert just enough to get
1811 * '<?xml version="1.0" encoding="xxx"?>'
1812 * parsed with the autodetected encoding
1813 * into the parser reading buffer.
1814 */
1815 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1816 ctxt->input->buf->buffer,
1817 ctxt->input->buf->raw);
1818 }
1819 if (nbchars < 0) {
1820 xmlGenericError(xmlGenericErrorContext,
1821 "xmlSwitchToEncoding: encoder error\n");
1822 return(-1);
1823 }
1824 ctxt->input->base =
1825 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001826 ctxt->input->end =
1827 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001828
1829 }
1830 return(0);
1831 } else {
1832 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1833 /*
1834 * When parsing a static memory array one must know the
1835 * size to be able to convert the buffer.
1836 */
1837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1838 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001839 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001840 return(-1);
1841 } else {
1842 int processed;
1843
1844 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001845 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001846 * Move it as the raw buffer and create a new input buffer
1847 */
1848 processed = ctxt->input->cur - ctxt->input->base;
1849
1850 ctxt->input->buf->raw = xmlBufferCreate();
1851 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1852 ctxt->input->length - processed);
1853 ctxt->input->buf->buffer = xmlBufferCreate();
1854
1855 /*
1856 * convert as much as possible of the raw input
1857 * to the parser reading buffer.
1858 */
1859 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1860 ctxt->input->buf->buffer,
1861 ctxt->input->buf->raw);
1862 if (nbchars < 0) {
1863 xmlGenericError(xmlGenericErrorContext,
1864 "xmlSwitchToEncoding: encoder error\n");
1865 return(-1);
1866 }
1867
1868 /*
1869 * Conversion succeeded, get rid of the old buffer
1870 */
1871 if ((ctxt->input->free != NULL) &&
1872 (ctxt->input->base != NULL))
1873 ctxt->input->free((xmlChar *) ctxt->input->base);
1874 ctxt->input->base =
1875 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001876 ctxt->input->end =
1877 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001878 }
1879 }
1880 } else {
1881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1882 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001883 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(-1);
1885 }
1886 /*
1887 * The parsing is now done in UTF8 natively
1888 */
1889 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1890 } else
1891 return(-1);
1892 return(0);
1893
1894}
1895
1896/************************************************************************
1897 * *
1898 * Commodity functions to handle entities processing *
1899 * *
1900 ************************************************************************/
1901
1902/**
1903 * xmlFreeInputStream:
1904 * @input: an xmlParserInputPtr
1905 *
1906 * Free up an input stream.
1907 */
1908void
1909xmlFreeInputStream(xmlParserInputPtr input) {
1910 if (input == NULL) return;
1911
1912 if (input->filename != NULL) xmlFree((char *) input->filename);
1913 if (input->directory != NULL) xmlFree((char *) input->directory);
1914 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1915 if (input->version != NULL) xmlFree((char *) input->version);
1916 if ((input->free != NULL) && (input->base != NULL))
1917 input->free((xmlChar *) input->base);
1918 if (input->buf != NULL)
1919 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001920 xmlFree(input);
1921}
1922
1923/**
1924 * xmlNewInputStream:
1925 * @ctxt: an XML parser context
1926 *
1927 * Create a new input stream structure
1928 * Returns the new input stream or NULL
1929 */
1930xmlParserInputPtr
1931xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1932 xmlParserInputPtr input;
1933
1934 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1935 if (input == NULL) {
1936 if (ctxt != NULL) {
1937 ctxt->errNo = XML_ERR_NO_MEMORY;
1938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939 ctxt->sax->error(ctxt->userData,
1940 "malloc: couldn't allocate a new input stream\n");
1941 ctxt->errNo = XML_ERR_NO_MEMORY;
1942 }
1943 return(NULL);
1944 }
1945 memset(input, 0, sizeof(xmlParserInput));
1946 input->line = 1;
1947 input->col = 1;
1948 input->standalone = -1;
1949 return(input);
1950}
1951
1952/**
1953 * xmlNewIOInputStream:
1954 * @ctxt: an XML parser context
1955 * @input: an I/O Input
1956 * @enc: the charset encoding if known
1957 *
1958 * Create a new input stream structure encapsulating the @input into
1959 * a stream suitable for the parser.
1960 *
1961 * Returns the new input stream or NULL
1962 */
1963xmlParserInputPtr
1964xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1965 xmlCharEncoding enc) {
1966 xmlParserInputPtr inputStream;
1967
1968 if (xmlParserDebugEntities)
1969 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1970 inputStream = xmlNewInputStream(ctxt);
1971 if (inputStream == NULL) {
1972 return(NULL);
1973 }
1974 inputStream->filename = NULL;
1975 inputStream->buf = input;
1976 inputStream->base = inputStream->buf->buffer->content;
1977 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001978 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001979 if (enc != XML_CHAR_ENCODING_NONE) {
1980 xmlSwitchEncoding(ctxt, enc);
1981 }
1982
1983 return(inputStream);
1984}
1985
1986/**
1987 * xmlNewEntityInputStream:
1988 * @ctxt: an XML parser context
1989 * @entity: an Entity pointer
1990 *
1991 * Create a new input stream based on an xmlEntityPtr
1992 *
1993 * Returns the new input stream or NULL
1994 */
1995xmlParserInputPtr
1996xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1997 xmlParserInputPtr input;
1998
1999 if (entity == NULL) {
2000 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2002 ctxt->sax->error(ctxt->userData,
2003 "internal: xmlNewEntityInputStream entity = NULL\n");
2004 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2005 return(NULL);
2006 }
2007 if (xmlParserDebugEntities)
2008 xmlGenericError(xmlGenericErrorContext,
2009 "new input from entity: %s\n", entity->name);
2010 if (entity->content == NULL) {
2011 switch (entity->etype) {
2012 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2013 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2015 ctxt->sax->error(ctxt->userData,
2016 "xmlNewEntityInputStream unparsed entity !\n");
2017 break;
2018 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2019 case XML_EXTERNAL_PARAMETER_ENTITY:
2020 return(xmlLoadExternalEntity((char *) entity->URI,
2021 (char *) entity->ExternalID, ctxt));
2022 case XML_INTERNAL_GENERAL_ENTITY:
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData,
2025 "Internal entity %s without content !\n", entity->name);
2026 break;
2027 case XML_INTERNAL_PARAMETER_ENTITY:
2028 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2030 ctxt->sax->error(ctxt->userData,
2031 "Internal parameter entity %s without content !\n", entity->name);
2032 break;
2033 case XML_INTERNAL_PREDEFINED_ENTITY:
2034 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2036 ctxt->sax->error(ctxt->userData,
2037 "Predefined entity %s without content !\n", entity->name);
2038 break;
2039 }
2040 return(NULL);
2041 }
2042 input = xmlNewInputStream(ctxt);
2043 if (input == NULL) {
2044 return(NULL);
2045 }
2046 input->filename = (char *) entity->URI;
2047 input->base = entity->content;
2048 input->cur = entity->content;
2049 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002050 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002051 return(input);
2052}
2053
2054/**
2055 * xmlNewStringInputStream:
2056 * @ctxt: an XML parser context
2057 * @buffer: an memory buffer
2058 *
2059 * Create a new input stream based on a memory buffer.
2060 * Returns the new input stream
2061 */
2062xmlParserInputPtr
2063xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2064 xmlParserInputPtr input;
2065
2066 if (buffer == NULL) {
2067 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "internal: xmlNewStringInputStream string = NULL\n");
2071 return(NULL);
2072 }
2073 if (xmlParserDebugEntities)
2074 xmlGenericError(xmlGenericErrorContext,
2075 "new fixed input: %.30s\n", buffer);
2076 input = xmlNewInputStream(ctxt);
2077 if (input == NULL) {
2078 return(NULL);
2079 }
2080 input->base = buffer;
2081 input->cur = buffer;
2082 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002083 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002084 return(input);
2085}
2086
2087/**
2088 * xmlNewInputFromFile:
2089 * @ctxt: an XML parser context
2090 * @filename: the filename to use as entity
2091 *
2092 * Create a new input stream based on a file.
2093 *
2094 * Returns the new input stream or NULL in case of error
2095 */
2096xmlParserInputPtr
2097xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2098 xmlParserInputBufferPtr buf;
2099 xmlParserInputPtr inputStream;
2100 char *directory = NULL;
2101 xmlChar *URI = NULL;
2102
2103 if (xmlParserDebugEntities)
2104 xmlGenericError(xmlGenericErrorContext,
2105 "new input from file: %s\n", filename);
2106 if (ctxt == NULL) return(NULL);
2107 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2108 if (buf == NULL)
2109 return(NULL);
2110
2111 URI = xmlStrdup((xmlChar *) filename);
2112 directory = xmlParserGetDirectory((const char *) URI);
2113
2114 inputStream = xmlNewInputStream(ctxt);
2115 if (inputStream == NULL) {
2116 if (directory != NULL) xmlFree((char *) directory);
2117 if (URI != NULL) xmlFree((char *) URI);
2118 return(NULL);
2119 }
2120
2121 inputStream->filename = (const char *) URI;
2122 inputStream->directory = directory;
2123 inputStream->buf = buf;
2124
2125 inputStream->base = inputStream->buf->buffer->content;
2126 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002127 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002128 if ((ctxt->directory == NULL) && (directory != NULL))
2129 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2130 return(inputStream);
2131}
2132
2133/************************************************************************
2134 * *
2135 * Commodity functions to handle parser contexts *
2136 * *
2137 ************************************************************************/
2138
2139/**
2140 * xmlInitParserCtxt:
2141 * @ctxt: an XML parser context
2142 *
2143 * Initialize a parser context
2144 */
2145
2146void
2147xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2148{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002149 if(ctxt==NULL) {
2150 xmlGenericError(xmlGenericErrorContext,
2151 "xmlInitParserCtxt: NULL context given\n");
2152 return;
2153 }
2154
Owen Taylor3473f882001-02-23 17:55:21 +00002155 xmlDefaultSAXHandlerInit();
2156
William M. Brack8b2c7f12002-11-22 05:07:29 +00002157 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2158 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002159 xmlGenericError(xmlGenericErrorContext,
2160 "xmlInitParserCtxt: out of memory\n");
2161 }
2162 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002163 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002164
2165 /* Allocate the Input stack */
2166 ctxt->inputTab = (xmlParserInputPtr *)
2167 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2168 if (ctxt->inputTab == NULL) {
2169 xmlGenericError(xmlGenericErrorContext,
2170 "xmlInitParserCtxt: out of memory\n");
2171 ctxt->inputNr = 0;
2172 ctxt->inputMax = 0;
2173 ctxt->input = NULL;
2174 return;
2175 }
2176 ctxt->inputNr = 0;
2177 ctxt->inputMax = 5;
2178 ctxt->input = NULL;
2179
2180 ctxt->version = NULL;
2181 ctxt->encoding = NULL;
2182 ctxt->standalone = -1;
2183 ctxt->hasExternalSubset = 0;
2184 ctxt->hasPErefs = 0;
2185 ctxt->html = 0;
2186 ctxt->external = 0;
2187 ctxt->instate = XML_PARSER_START;
2188 ctxt->token = 0;
2189 ctxt->directory = NULL;
2190
2191 /* Allocate the Node stack */
2192 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2193 if (ctxt->nodeTab == NULL) {
2194 xmlGenericError(xmlGenericErrorContext,
2195 "xmlInitParserCtxt: out of memory\n");
2196 ctxt->nodeNr = 0;
2197 ctxt->nodeMax = 0;
2198 ctxt->node = NULL;
2199 ctxt->inputNr = 0;
2200 ctxt->inputMax = 0;
2201 ctxt->input = NULL;
2202 return;
2203 }
2204 ctxt->nodeNr = 0;
2205 ctxt->nodeMax = 10;
2206 ctxt->node = NULL;
2207
2208 /* Allocate the Name stack */
2209 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2210 if (ctxt->nameTab == NULL) {
2211 xmlGenericError(xmlGenericErrorContext,
2212 "xmlInitParserCtxt: out of memory\n");
2213 ctxt->nodeNr = 0;
2214 ctxt->nodeMax = 0;
2215 ctxt->node = NULL;
2216 ctxt->inputNr = 0;
2217 ctxt->inputMax = 0;
2218 ctxt->input = NULL;
2219 ctxt->nameNr = 0;
2220 ctxt->nameMax = 0;
2221 ctxt->name = NULL;
2222 return;
2223 }
2224 ctxt->nameNr = 0;
2225 ctxt->nameMax = 10;
2226 ctxt->name = NULL;
2227
2228 /* Allocate the space stack */
2229 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2230 if (ctxt->spaceTab == NULL) {
2231 xmlGenericError(xmlGenericErrorContext,
2232 "xmlInitParserCtxt: out of memory\n");
2233 ctxt->nodeNr = 0;
2234 ctxt->nodeMax = 0;
2235 ctxt->node = NULL;
2236 ctxt->inputNr = 0;
2237 ctxt->inputMax = 0;
2238 ctxt->input = NULL;
2239 ctxt->nameNr = 0;
2240 ctxt->nameMax = 0;
2241 ctxt->name = NULL;
2242 ctxt->spaceNr = 0;
2243 ctxt->spaceMax = 0;
2244 ctxt->space = NULL;
2245 return;
2246 }
2247 ctxt->spaceNr = 1;
2248 ctxt->spaceMax = 10;
2249 ctxt->spaceTab[0] = -1;
2250 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002251 ctxt->userData = ctxt;
2252 ctxt->myDoc = NULL;
2253 ctxt->wellFormed = 1;
2254 ctxt->valid = 1;
2255 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2256 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2257 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002258 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002260 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002261 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002262
Owen Taylor3473f882001-02-23 17:55:21 +00002263 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002264 ctxt->vctxt.error = xmlParserValidityError;
2265 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002267 if (xmlGetWarningsDefaultValue == 0)
2268 ctxt->vctxt.warning = NULL;
2269 else
2270 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002271 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002272 }
2273 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2274 ctxt->record_info = 0;
2275 ctxt->nbChars = 0;
2276 ctxt->checkIndex = 0;
2277 ctxt->inSubset = 0;
2278 ctxt->errNo = XML_ERR_OK;
2279 ctxt->depth = 0;
2280 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002281 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002282 xmlInitNodeInfoSeq(&ctxt->node_seq);
2283}
2284
2285/**
2286 * xmlFreeParserCtxt:
2287 * @ctxt: an XML parser context
2288 *
2289 * Free all the memory used by a parser context. However the parsed
2290 * document in ctxt->myDoc is not freed.
2291 */
2292
2293void
2294xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2295{
2296 xmlParserInputPtr input;
2297 xmlChar *oldname;
2298
2299 if (ctxt == NULL) return;
2300
2301 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2302 xmlFreeInputStream(input);
2303 }
2304 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2305 xmlFree(oldname);
2306 }
2307 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2308 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2309 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2310 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2311 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2312 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2313 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2314 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2315 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002316 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2317 xmlFree(ctxt->sax);
2318 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002319 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002320#ifdef LIBXML_CATALOG_ENABLED
2321 if (ctxt->catalogs != NULL)
2322 xmlCatalogFreeLocal(ctxt->catalogs);
2323#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002324 xmlFree(ctxt);
2325}
2326
2327/**
2328 * xmlNewParserCtxt:
2329 *
2330 * Allocate and initialize a new parser context.
2331 *
2332 * Returns the xmlParserCtxtPtr or NULL
2333 */
2334
2335xmlParserCtxtPtr
2336xmlNewParserCtxt()
2337{
2338 xmlParserCtxtPtr ctxt;
2339
2340 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2341 if (ctxt == NULL) {
2342 xmlGenericError(xmlGenericErrorContext,
2343 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002344 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002345 return(NULL);
2346 }
2347 memset(ctxt, 0, sizeof(xmlParserCtxt));
2348 xmlInitParserCtxt(ctxt);
2349 return(ctxt);
2350}
2351
2352/************************************************************************
2353 * *
2354 * Handling of node informations *
2355 * *
2356 ************************************************************************/
2357
2358/**
2359 * xmlClearParserCtxt:
2360 * @ctxt: an XML parser context
2361 *
2362 * Clear (release owned resources) and reinitialize a parser context
2363 */
2364
2365void
2366xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2367{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002368 if (ctxt==NULL)
2369 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002370 xmlClearNodeInfoSeq(&ctxt->node_seq);
2371 xmlInitParserCtxt(ctxt);
2372}
2373
2374/**
2375 * xmlParserFindNodeInfo:
2376 * @ctxt: an XML parser context
2377 * @node: an XML node within the tree
2378 *
2379 * Find the parser node info struct for a given node
2380 *
2381 * Returns an xmlParserNodeInfo block pointer or NULL
2382 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002383const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2384 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002385{
2386 unsigned long pos;
2387
2388 /* Find position where node should be at */
2389 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002390 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002391 return &ctx->node_seq.buffer[pos];
2392 else
2393 return NULL;
2394}
2395
2396
2397/**
2398 * xmlInitNodeInfoSeq:
2399 * @seq: a node info sequence pointer
2400 *
2401 * -- Initialize (set to initial state) node info sequence
2402 */
2403void
2404xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2405{
2406 seq->length = 0;
2407 seq->maximum = 0;
2408 seq->buffer = NULL;
2409}
2410
2411/**
2412 * xmlClearNodeInfoSeq:
2413 * @seq: a node info sequence pointer
2414 *
2415 * -- Clear (release memory and reinitialize) node
2416 * info sequence
2417 */
2418void
2419xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2420{
2421 if ( seq->buffer != NULL )
2422 xmlFree(seq->buffer);
2423 xmlInitNodeInfoSeq(seq);
2424}
2425
2426
2427/**
2428 * xmlParserFindNodeInfoIndex:
2429 * @seq: a node info sequence pointer
2430 * @node: an XML node pointer
2431 *
2432 *
2433 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2434 * the given node is or should be at in a sorted sequence
2435 *
2436 * Returns a long indicating the position of the record
2437 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002438unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2439 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002440{
2441 unsigned long upper, lower, middle;
2442 int found = 0;
2443
2444 /* Do a binary search for the key */
2445 lower = 1;
2446 upper = seq->length;
2447 middle = 0;
2448 while ( lower <= upper && !found) {
2449 middle = lower + (upper - lower) / 2;
2450 if ( node == seq->buffer[middle - 1].node )
2451 found = 1;
2452 else if ( node < seq->buffer[middle - 1].node )
2453 upper = middle - 1;
2454 else
2455 lower = middle + 1;
2456 }
2457
2458 /* Return position */
2459 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2460 return middle;
2461 else
2462 return middle - 1;
2463}
2464
2465
2466/**
2467 * xmlParserAddNodeInfo:
2468 * @ctxt: an XML parser context
2469 * @info: a node info sequence pointer
2470 *
2471 * Insert node info record into the sorted sequence
2472 */
2473void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002474xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002475 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002476{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002477 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002478
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002479 /* Find pos and check to see if node is already in the sequence */
2480 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2481 info->node);
2482 if (pos < ctxt->node_seq.length
2483 && ctxt->node_seq.buffer[pos].node == info->node) {
2484 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002485 }
2486
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002487 /* Otherwise, we need to add new node to buffer */
2488 else {
2489 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2490 xmlParserNodeInfo *tmp_buffer;
2491 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002492
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002493 if (ctxt->node_seq.maximum == 0)
2494 ctxt->node_seq.maximum = 2;
2495 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2496 (2 * ctxt->node_seq.maximum));
2497
2498 if (ctxt->node_seq.buffer == NULL)
2499 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2500 else
2501 tmp_buffer =
2502 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2503 byte_size);
2504
2505 if (tmp_buffer == NULL) {
2506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2507 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2508 ctxt->errNo = XML_ERR_NO_MEMORY;
2509 return;
2510 }
2511 ctxt->node_seq.buffer = tmp_buffer;
2512 ctxt->node_seq.maximum *= 2;
2513 }
2514
2515 /* If position is not at end, move elements out of the way */
2516 if (pos != ctxt->node_seq.length) {
2517 unsigned long i;
2518
2519 for (i = ctxt->node_seq.length; i > pos; i--)
2520 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2521 }
2522
2523 /* Copy element and increase length */
2524 ctxt->node_seq.buffer[pos] = *info;
2525 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002526 }
Owen Taylor3473f882001-02-23 17:55:21 +00002527}
2528
2529/************************************************************************
2530 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002531 * Defaults settings *
2532 * *
2533 ************************************************************************/
2534/**
2535 * xmlPedanticParserDefault:
2536 * @val: int 0 or 1
2537 *
2538 * Set and return the previous value for enabling pedantic warnings.
2539 *
2540 * Returns the last value for 0 for no substitution, 1 for substitution.
2541 */
2542
2543int
2544xmlPedanticParserDefault(int val) {
2545 int old = xmlPedanticParserDefaultValue;
2546
2547 xmlPedanticParserDefaultValue = val;
2548 return(old);
2549}
2550
2551/**
2552 * xmlLineNumbersDefault:
2553 * @val: int 0 or 1
2554 *
2555 * Set and return the previous value for enabling line numbers in elements
2556 * contents. This may break on old application and is turned off by default.
2557 *
2558 * Returns the last value for 0 for no substitution, 1 for substitution.
2559 */
2560
2561int
2562xmlLineNumbersDefault(int val) {
2563 int old = xmlLineNumbersDefaultValue;
2564
2565 xmlLineNumbersDefaultValue = val;
2566 return(old);
2567}
2568
2569/**
2570 * xmlSubstituteEntitiesDefault:
2571 * @val: int 0 or 1
2572 *
2573 * Set and return the previous value for default entity support.
2574 * Initially the parser always keep entity references instead of substituting
2575 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002576 * default parser behavior
2577 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002578 * file basis.
2579 *
2580 * Returns the last value for 0 for no substitution, 1 for substitution.
2581 */
2582
2583int
2584xmlSubstituteEntitiesDefault(int val) {
2585 int old = xmlSubstituteEntitiesDefaultValue;
2586
2587 xmlSubstituteEntitiesDefaultValue = val;
2588 return(old);
2589}
2590
2591/**
2592 * xmlKeepBlanksDefault:
2593 * @val: int 0 or 1
2594 *
2595 * Set and return the previous value for default blanks text nodes support.
2596 * The 1.x version of the parser used an heuristic to try to detect
2597 * ignorable white spaces. As a result the SAX callback was generating
2598 * ignorableWhitespace() callbacks instead of characters() one, and when
2599 * using the DOM output text nodes containing those blanks were not generated.
2600 * The 2.x and later version will switch to the XML standard way and
2601 * ignorableWhitespace() are only generated when running the parser in
2602 * validating mode and when the current element doesn't allow CDATA or
2603 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002604 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002605 * on 1.X libs and to switch back to the old mode for compatibility when
2606 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2607 * by using xmlIsBlankNode() commodity function to detect the "empty"
2608 * nodes generated.
2609 * This value also affect autogeneration of indentation when saving code
2610 * if blanks sections are kept, indentation is not generated.
2611 *
2612 * Returns the last value for 0 for no substitution, 1 for substitution.
2613 */
2614
2615int
2616xmlKeepBlanksDefault(int val) {
2617 int old = xmlKeepBlanksDefaultValue;
2618
2619 xmlKeepBlanksDefaultValue = val;
2620 xmlIndentTreeOutput = !val;
2621 return(old);
2622}
2623
2624/************************************************************************
2625 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002626 * Deprecated functions kept for compatibility *
2627 * *
2628 ************************************************************************/
2629
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002630/**
2631 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002632 * @lang: pointer to the string value
2633 *
2634 * Checks that the value conforms to the LanguageID production:
2635 *
2636 * NOTE: this is somewhat deprecated, those productions were removed from
2637 * the XML Second edition.
2638 *
2639 * [33] LanguageID ::= Langcode ('-' Subcode)*
2640 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2641 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2642 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2643 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2644 * [38] Subcode ::= ([a-z] | [A-Z])+
2645 *
2646 * Returns 1 if correct 0 otherwise
2647 **/
2648int
2649xmlCheckLanguageID(const xmlChar *lang) {
2650 const xmlChar *cur = lang;
2651
2652 if (cur == NULL)
2653 return(0);
2654 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2655 ((cur[0] == 'I') && (cur[1] == '-'))) {
2656 /*
2657 * IANA code
2658 */
2659 cur += 2;
2660 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2661 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2662 cur++;
2663 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2664 ((cur[0] == 'X') && (cur[1] == '-'))) {
2665 /*
2666 * User code
2667 */
2668 cur += 2;
2669 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2670 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2671 cur++;
2672 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2673 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2674 /*
2675 * ISO639
2676 */
2677 cur++;
2678 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2679 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2680 cur++;
2681 else
2682 return(0);
2683 } else
2684 return(0);
2685 while (cur[0] != 0) { /* non input consuming */
2686 if (cur[0] != '-')
2687 return(0);
2688 cur++;
2689 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2690 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2691 cur++;
2692 else
2693 return(0);
2694 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2695 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2696 cur++;
2697 }
2698 return(1);
2699}
2700
2701/**
2702 * xmlDecodeEntities:
2703 * @ctxt: the parser context
2704 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2705 * @len: the len to decode (in bytes !), -1 for no size limit
2706 * @end: an end marker xmlChar, 0 if none
2707 * @end2: an end marker xmlChar, 0 if none
2708 * @end3: an end marker xmlChar, 0 if none
2709 *
2710 * This function is deprecated, we now always process entities content
2711 * through xmlStringDecodeEntities
2712 *
2713 * TODO: remove it in next major release.
2714 *
2715 * [67] Reference ::= EntityRef | CharRef
2716 *
2717 * [69] PEReference ::= '%' Name ';'
2718 *
2719 * Returns A newly allocated string with the substitution done. The caller
2720 * must deallocate it !
2721 */
2722xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002723xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2724 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002725#if 0
2726 xmlChar *buffer = NULL;
2727 unsigned int buffer_size = 0;
2728 unsigned int nbchars = 0;
2729
2730 xmlChar *current = NULL;
2731 xmlEntityPtr ent;
2732 unsigned int max = (unsigned int) len;
2733 int c,l;
2734#endif
2735
2736 static int deprecated = 0;
2737 if (!deprecated) {
2738 xmlGenericError(xmlGenericErrorContext,
2739 "xmlDecodeEntities() deprecated function reached\n");
2740 deprecated = 1;
2741 }
2742
2743#if 0
2744 if (ctxt->depth > 40) {
2745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2746 ctxt->sax->error(ctxt->userData,
2747 "Detected entity reference loop\n");
2748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002750 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2751 return(NULL);
2752 }
2753
2754 /*
2755 * allocate a translation buffer.
2756 */
2757 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2758 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2759 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002760 xmlGenericError(xmlGenericErrorContext,
2761 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002762 return(NULL);
2763 }
2764
2765 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002766 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002767 */
2768 GROW;
2769 c = CUR_CHAR(l);
2770 while ((nbchars < max) && (c != end) && /* NOTUSED */
2771 (c != end2) && (c != end3)) {
2772 GROW;
2773 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002774 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002775 int val = xmlParseCharRef(ctxt);
2776 COPY_BUF(0,buffer,nbchars,val);
2777 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002778 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002779 (what & XML_SUBSTITUTE_REF)) {
2780 if (xmlParserDebugEntities)
2781 xmlGenericError(xmlGenericErrorContext,
2782 "decoding Entity Reference\n");
2783 ent = xmlParseEntityRef(ctxt);
2784 if ((ent != NULL) &&
2785 (ctxt->replaceEntities != 0)) {
2786 current = ent->content;
2787 while (*current != 0) { /* non input consuming loop */
2788 buffer[nbchars++] = *current++;
2789 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2790 growBuffer(buffer);
2791 }
2792 }
2793 } else if (ent != NULL) {
2794 const xmlChar *cur = ent->name;
2795
2796 buffer[nbchars++] = '&';
2797 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2798 growBuffer(buffer);
2799 }
2800 while (*cur != 0) { /* non input consuming loop */
2801 buffer[nbchars++] = *cur++;
2802 }
2803 buffer[nbchars++] = ';';
2804 }
2805 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2806 /*
2807 * a PEReference induce to switch the entity flow,
2808 * we break here to flush the current set of chars
2809 * parsed if any. We will be called back later.
2810 */
2811 if (xmlParserDebugEntities)
2812 xmlGenericError(xmlGenericErrorContext,
2813 "decoding PE Reference\n");
2814 if (nbchars != 0) break;
2815
2816 xmlParsePEReference(ctxt);
2817
2818 /*
2819 * Pop-up of finished entities.
2820 */
2821 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2822 xmlPopInput(ctxt);
2823
2824 break;
2825 } else {
2826 COPY_BUF(l,buffer,nbchars,c);
2827 NEXTL(l);
2828 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2829 growBuffer(buffer);
2830 }
2831 }
2832 c = CUR_CHAR(l);
2833 }
2834 buffer[nbchars++] = 0;
2835 return(buffer);
2836#endif
2837 return(NULL);
2838}
2839
2840/**
2841 * xmlNamespaceParseNCName:
2842 * @ctxt: an XML parser context
2843 *
2844 * parse an XML namespace name.
2845 *
2846 * TODO: this seems not in use anymore, the namespace handling is done on
2847 * top of the SAX interfaces, i.e. not on raw input.
2848 *
2849 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2850 *
2851 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2852 * CombiningChar | Extender
2853 *
2854 * Returns the namespace name or NULL
2855 */
2856
2857xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002858xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002859#if 0
2860 xmlChar buf[XML_MAX_NAMELEN + 5];
2861 int len = 0, l;
2862 int cur = CUR_CHAR(l);
2863#endif
2864
2865 static int deprecated = 0;
2866 if (!deprecated) {
2867 xmlGenericError(xmlGenericErrorContext,
2868 "xmlNamespaceParseNCName() deprecated function reached\n");
2869 deprecated = 1;
2870 }
2871
2872#if 0
2873 /* load first the value of the char !!! */
2874 GROW;
2875 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2876
2877xmlGenericError(xmlGenericErrorContext,
2878 "xmlNamespaceParseNCName: reached loop 3\n");
2879 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2880 (cur == '.') || (cur == '-') ||
2881 (cur == '_') ||
2882 (IS_COMBINING(cur)) ||
2883 (IS_EXTENDER(cur))) {
2884 COPY_BUF(l,buf,len,cur);
2885 NEXTL(l);
2886 cur = CUR_CHAR(l);
2887 if (len >= XML_MAX_NAMELEN) {
2888 xmlGenericError(xmlGenericErrorContext,
2889 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2890 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2891 (cur == '.') || (cur == '-') ||
2892 (cur == '_') ||
2893 (IS_COMBINING(cur)) ||
2894 (IS_EXTENDER(cur))) {
2895 NEXTL(l);
2896 cur = CUR_CHAR(l);
2897 }
2898 break;
2899 }
2900 }
2901 return(xmlStrndup(buf, len));
2902#endif
2903 return(NULL);
2904}
2905
2906/**
2907 * xmlNamespaceParseQName:
2908 * @ctxt: an XML parser context
2909 * @prefix: a xmlChar **
2910 *
2911 * TODO: this seems not in use anymore, the namespace handling is done on
2912 * top of the SAX interfaces, i.e. not on raw input.
2913 *
2914 * parse an XML qualified name
2915 *
2916 * [NS 5] QName ::= (Prefix ':')? LocalPart
2917 *
2918 * [NS 6] Prefix ::= NCName
2919 *
2920 * [NS 7] LocalPart ::= NCName
2921 *
2922 * Returns the local part, and prefix is updated
2923 * to get the Prefix if any.
2924 */
2925
2926xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002927xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002928
2929 static int deprecated = 0;
2930 if (!deprecated) {
2931 xmlGenericError(xmlGenericErrorContext,
2932 "xmlNamespaceParseQName() deprecated function reached\n");
2933 deprecated = 1;
2934 }
2935
2936#if 0
2937 xmlChar *ret = NULL;
2938
2939 *prefix = NULL;
2940 ret = xmlNamespaceParseNCName(ctxt);
2941 if (RAW == ':') {
2942 *prefix = ret;
2943 NEXT;
2944 ret = xmlNamespaceParseNCName(ctxt);
2945 }
2946
2947 return(ret);
2948#endif
2949 return(NULL);
2950}
2951
2952/**
2953 * xmlNamespaceParseNSDef:
2954 * @ctxt: an XML parser context
2955 *
2956 * parse a namespace prefix declaration
2957 *
2958 * TODO: this seems not in use anymore, the namespace handling is done on
2959 * top of the SAX interfaces, i.e. not on raw input.
2960 *
2961 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2962 *
2963 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2964 *
2965 * Returns the namespace name
2966 */
2967
2968xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002969xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002970 static int deprecated = 0;
2971 if (!deprecated) {
2972 xmlGenericError(xmlGenericErrorContext,
2973 "xmlNamespaceParseNSDef() deprecated function reached\n");
2974 deprecated = 1;
2975 }
2976 return(NULL);
2977#if 0
2978 xmlChar *name = NULL;
2979
2980 if ((RAW == 'x') && (NXT(1) == 'm') &&
2981 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2982 (NXT(4) == 's')) {
2983 SKIP(5);
2984 if (RAW == ':') {
2985 NEXT;
2986 name = xmlNamespaceParseNCName(ctxt);
2987 }
2988 }
2989 return(name);
2990#endif
2991}
2992
2993/**
2994 * xmlParseQuotedString:
2995 * @ctxt: an XML parser context
2996 *
2997 * Parse and return a string between quotes or doublequotes
2998 *
2999 * TODO: Deprecated, to be removed at next drop of binary compatibility
3000 *
3001 * Returns the string parser or NULL.
3002 */
3003xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003004xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003005 static int deprecated = 0;
3006 if (!deprecated) {
3007 xmlGenericError(xmlGenericErrorContext,
3008 "xmlParseQuotedString() deprecated function reached\n");
3009 deprecated = 1;
3010 }
3011 return(NULL);
3012
3013#if 0
3014 xmlChar *buf = NULL;
3015 int len = 0,l;
3016 int size = XML_PARSER_BUFFER_SIZE;
3017 int c;
3018
3019 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3020 if (buf == NULL) {
3021 xmlGenericError(xmlGenericErrorContext,
3022 "malloc of %d byte failed\n", size);
3023 return(NULL);
3024 }
3025xmlGenericError(xmlGenericErrorContext,
3026 "xmlParseQuotedString: reached loop 4\n");
3027 if (RAW == '"') {
3028 NEXT;
3029 c = CUR_CHAR(l);
3030 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3031 if (len + 5 >= size) {
3032 size *= 2;
3033 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3034 if (buf == NULL) {
3035 xmlGenericError(xmlGenericErrorContext,
3036 "realloc of %d byte failed\n", size);
3037 return(NULL);
3038 }
3039 }
3040 COPY_BUF(l,buf,len,c);
3041 NEXTL(l);
3042 c = CUR_CHAR(l);
3043 }
3044 if (c != '"') {
3045 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047 ctxt->sax->error(ctxt->userData,
3048 "String not closed \"%.50s\"\n", buf);
3049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003051 } else {
3052 NEXT;
3053 }
3054 } else if (RAW == '\''){
3055 NEXT;
3056 c = CUR;
3057 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3058 if (len + 1 >= size) {
3059 size *= 2;
3060 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3061 if (buf == NULL) {
3062 xmlGenericError(xmlGenericErrorContext,
3063 "realloc of %d byte failed\n", size);
3064 return(NULL);
3065 }
3066 }
3067 buf[len++] = c;
3068 NEXT;
3069 c = CUR;
3070 }
3071 if (RAW != '\'') {
3072 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3074 ctxt->sax->error(ctxt->userData,
3075 "String not closed \"%.50s\"\n", buf);
3076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003078 } else {
3079 NEXT;
3080 }
3081 }
3082 return(buf);
3083#endif
3084}
3085
3086/**
3087 * xmlParseNamespace:
3088 * @ctxt: an XML parser context
3089 *
3090 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3091 *
3092 * This is what the older xml-name Working Draft specified, a bunch of
3093 * other stuff may still rely on it, so support is still here as
3094 * if it was declared on the root of the Tree:-(
3095 *
3096 * TODO: remove from library
3097 *
3098 * To be removed at next drop of binary compatibility
3099 */
3100
3101void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003102xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003103 static int deprecated = 0;
3104 if (!deprecated) {
3105 xmlGenericError(xmlGenericErrorContext,
3106 "xmlParseNamespace() deprecated function reached\n");
3107 deprecated = 1;
3108 }
3109
3110#if 0
3111 xmlChar *href = NULL;
3112 xmlChar *prefix = NULL;
3113 int garbage = 0;
3114
3115 /*
3116 * We just skipped "namespace" or "xml:namespace"
3117 */
3118 SKIP_BLANKS;
3119
3120xmlGenericError(xmlGenericErrorContext,
3121 "xmlParseNamespace: reached loop 5\n");
3122 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3123 /*
3124 * We can have "ns" or "prefix" attributes
3125 * Old encoding as 'href' or 'AS' attributes is still supported
3126 */
3127 if ((RAW == 'n') && (NXT(1) == 's')) {
3128 garbage = 0;
3129 SKIP(2);
3130 SKIP_BLANKS;
3131
3132 if (RAW != '=') continue;
3133 NEXT;
3134 SKIP_BLANKS;
3135
3136 href = xmlParseQuotedString(ctxt);
3137 SKIP_BLANKS;
3138 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3139 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3140 garbage = 0;
3141 SKIP(4);
3142 SKIP_BLANKS;
3143
3144 if (RAW != '=') continue;
3145 NEXT;
3146 SKIP_BLANKS;
3147
3148 href = xmlParseQuotedString(ctxt);
3149 SKIP_BLANKS;
3150 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3151 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3152 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3153 garbage = 0;
3154 SKIP(6);
3155 SKIP_BLANKS;
3156
3157 if (RAW != '=') continue;
3158 NEXT;
3159 SKIP_BLANKS;
3160
3161 prefix = xmlParseQuotedString(ctxt);
3162 SKIP_BLANKS;
3163 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3164 garbage = 0;
3165 SKIP(2);
3166 SKIP_BLANKS;
3167
3168 if (RAW != '=') continue;
3169 NEXT;
3170 SKIP_BLANKS;
3171
3172 prefix = xmlParseQuotedString(ctxt);
3173 SKIP_BLANKS;
3174 } else if ((RAW == '?') && (NXT(1) == '>')) {
3175 garbage = 0;
3176 NEXT;
3177 } else {
3178 /*
3179 * Found garbage when parsing the namespace
3180 */
3181 if (!garbage) {
3182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183 ctxt->sax->error(ctxt->userData,
3184 "xmlParseNamespace found garbage\n");
3185 }
3186 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3187 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003188 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003189 NEXT;
3190 }
3191 }
3192
3193 MOVETO_ENDTAG(CUR_PTR);
3194 NEXT;
3195
3196 /*
3197 * Register the DTD.
3198 if (href != NULL)
3199 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3200 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3201 */
3202
3203 if (prefix != NULL) xmlFree(prefix);
3204 if (href != NULL) xmlFree(href);
3205#endif
3206}
3207
3208/**
3209 * xmlScanName:
3210 * @ctxt: an XML parser context
3211 *
3212 * Trickery: parse an XML name but without consuming the input flow
3213 * Needed for rollback cases. Used only when parsing entities references.
3214 *
3215 * TODO: seems deprecated now, only used in the default part of
3216 * xmlParserHandleReference
3217 *
3218 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3219 * CombiningChar | Extender
3220 *
3221 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3222 *
3223 * [6] Names ::= Name (S Name)*
3224 *
3225 * Returns the Name parsed or NULL
3226 */
3227
3228xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003229xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003230 static int deprecated = 0;
3231 if (!deprecated) {
3232 xmlGenericError(xmlGenericErrorContext,
3233 "xmlScanName() deprecated function reached\n");
3234 deprecated = 1;
3235 }
3236 return(NULL);
3237
3238#if 0
3239 xmlChar buf[XML_MAX_NAMELEN];
3240 int len = 0;
3241
3242 GROW;
3243 if (!IS_LETTER(RAW) && (RAW != '_') &&
3244 (RAW != ':')) {
3245 return(NULL);
3246 }
3247
3248
3249 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3250 (NXT(len) == '.') || (NXT(len) == '-') ||
3251 (NXT(len) == '_') || (NXT(len) == ':') ||
3252 (IS_COMBINING(NXT(len))) ||
3253 (IS_EXTENDER(NXT(len)))) {
3254 GROW;
3255 buf[len] = NXT(len);
3256 len++;
3257 if (len >= XML_MAX_NAMELEN) {
3258 xmlGenericError(xmlGenericErrorContext,
3259 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3260 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3261 (IS_DIGIT(NXT(len))) ||
3262 (NXT(len) == '.') || (NXT(len) == '-') ||
3263 (NXT(len) == '_') || (NXT(len) == ':') ||
3264 (IS_COMBINING(NXT(len))) ||
3265 (IS_EXTENDER(NXT(len))))
3266 len++;
3267 break;
3268 }
3269 }
3270 return(xmlStrndup(buf, len));
3271#endif
3272}
3273
3274/**
3275 * xmlParserHandleReference:
3276 * @ctxt: the parser context
3277 *
3278 * TODO: Remove, now deprecated ... the test is done directly in the
3279 * content parsing
3280 * routines.
3281 *
3282 * [67] Reference ::= EntityRef | CharRef
3283 *
3284 * [68] EntityRef ::= '&' Name ';'
3285 *
3286 * [ WFC: Entity Declared ]
3287 * the Name given in the entity reference must match that in an entity
3288 * declaration, except that well-formed documents need not declare any
3289 * of the following entities: amp, lt, gt, apos, quot.
3290 *
3291 * [ WFC: Parsed Entity ]
3292 * An entity reference must not contain the name of an unparsed entity
3293 *
3294 * [66] CharRef ::= '&#' [0-9]+ ';' |
3295 * '&#x' [0-9a-fA-F]+ ';'
3296 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003297 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003298 * the handling is done accordingly to
3299 * http://www.w3.org/TR/REC-xml#entproc
3300 */
3301void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003302xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003303 static int deprecated = 0;
3304 if (!deprecated) {
3305 xmlGenericError(xmlGenericErrorContext,
3306 "xmlParserHandleReference() deprecated function reached\n");
3307 deprecated = 1;
3308 }
3309
Owen Taylor3473f882001-02-23 17:55:21 +00003310 return;
3311}
3312
3313/**
3314 * xmlHandleEntity:
3315 * @ctxt: an XML parser context
3316 * @entity: an XML entity pointer.
3317 *
3318 * Default handling of defined entities, when should we define a new input
3319 * stream ? When do we just handle that as a set of chars ?
3320 *
3321 * OBSOLETE: to be removed at some point.
3322 */
3323
3324void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003325xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003326 static int deprecated = 0;
3327 if (!deprecated) {
3328 xmlGenericError(xmlGenericErrorContext,
3329 "xmlHandleEntity() deprecated function reached\n");
3330 deprecated = 1;
3331 }
3332
3333#if 0
3334 int len;
3335 xmlParserInputPtr input;
3336
3337 if (entity->content == NULL) {
3338 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3341 entity->name);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 return;
3345 }
3346 len = xmlStrlen(entity->content);
3347 if (len <= 2) goto handle_as_char;
3348
3349 /*
3350 * Redefine its content as an input stream.
3351 */
3352 input = xmlNewEntityInputStream(ctxt, entity);
3353 xmlPushInput(ctxt, input);
3354 return;
3355
3356handle_as_char:
3357 /*
3358 * Just handle the content as a set of chars.
3359 */
3360 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3361 (ctxt->sax->characters != NULL))
3362 ctxt->sax->characters(ctxt->userData, entity->content, len);
3363#endif
3364}
3365
3366/**
3367 * xmlNewGlobalNs:
3368 * @doc: the document carrying the namespace
3369 * @href: the URI associated
3370 * @prefix: the prefix for the namespace
3371 *
3372 * Creation of a Namespace, the old way using PI and without scoping
3373 * DEPRECATED !!!
3374 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003375 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003376 */
3377xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003378xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3379 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003380 static int deprecated = 0;
3381 if (!deprecated) {
3382 xmlGenericError(xmlGenericErrorContext,
3383 "xmlNewGlobalNs() deprecated function reached\n");
3384 deprecated = 1;
3385 }
3386 return(NULL);
3387#if 0
3388 xmlNodePtr root;
3389
3390 xmlNsPtr cur;
3391
3392 root = xmlDocGetRootElement(doc);
3393 if (root != NULL)
3394 return(xmlNewNs(root, href, prefix));
3395
3396 /*
3397 * if there is no root element yet, create an old Namespace type
3398 * and it will be moved to the root at save time.
3399 */
3400 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3401 if (cur == NULL) {
3402 xmlGenericError(xmlGenericErrorContext,
3403 "xmlNewGlobalNs : malloc failed\n");
3404 return(NULL);
3405 }
3406 memset(cur, 0, sizeof(xmlNs));
3407 cur->type = XML_GLOBAL_NAMESPACE;
3408
3409 if (href != NULL)
3410 cur->href = xmlStrdup(href);
3411 if (prefix != NULL)
3412 cur->prefix = xmlStrdup(prefix);
3413
3414 /*
3415 * Add it at the end to preserve parsing order ...
3416 */
3417 if (doc != NULL) {
3418 if (doc->oldNs == NULL) {
3419 doc->oldNs = cur;
3420 } else {
3421 xmlNsPtr prev = doc->oldNs;
3422
3423 while (prev->next != NULL) prev = prev->next;
3424 prev->next = cur;
3425 }
3426 }
3427
3428 return(NULL);
3429#endif
3430}
3431
3432/**
3433 * xmlUpgradeOldNs:
3434 * @doc: a document pointer
3435 *
3436 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3437 * DEPRECATED
3438 */
3439void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003440xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003441 static int deprecated = 0;
3442 if (!deprecated) {
3443 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003444 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003445 deprecated = 1;
3446 }
3447#if 0
3448 xmlNsPtr cur;
3449
3450 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3451 if (doc->children == NULL) {
3452#ifdef DEBUG_TREE
3453 xmlGenericError(xmlGenericErrorContext,
3454 "xmlUpgradeOldNs: failed no root !\n");
3455#endif
3456 return;
3457 }
3458
3459 cur = doc->oldNs;
3460 while (cur->next != NULL) {
3461 cur->type = XML_LOCAL_NAMESPACE;
3462 cur = cur->next;
3463 }
3464 cur->type = XML_LOCAL_NAMESPACE;
3465 cur->next = doc->children->nsDef;
3466 doc->children->nsDef = doc->oldNs;
3467 doc->oldNs = NULL;
3468#endif
3469}
3470