blob: c09fc95d331827ce5d52bf70f9d627b2f0289bb0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
415/**
416 * xmlIsBaseChar:
417 * @c: an unicode character (int)
418 *
419 * Check whether the character is allowed by the production
420 * [85] BaseChar ::= ... long list see REC ...
421 *
422 * VI is your friend !
423 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
424 * and
425 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
426 *
427 * Returns 0 if not, non-zero otherwise
428 */
429static int xmlBaseArray[] = {
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
434 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
443 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
446};
447
448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
894void check_buffer(xmlParserInputPtr in) {
895 if (in->base != in->buf->buffer->content) {
896 xmlGenericError(xmlGenericErrorContext,
897 "xmlParserInput: base mismatch problem\n");
898 }
899 if (in->cur < in->base) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: cur < base problem\n");
902 }
903 if (in->cur > in->base + in->buf->buffer->use) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur > base + use problem\n");
906 }
907 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
908 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
909 in->buf->buffer->use, in->buf->buffer->size);
910}
911
912#else
913#define CHECK_BUFFER(in)
914#endif
915
916
917/**
918 * xmlParserInputRead:
919 * @in: an XML parser input
920 * @len: an indicative size for the lookahead
921 *
922 * This function refresh the input for the parser. It doesn't try to
923 * preserve pointers to the input buffer, and discard already read data
924 *
925 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
926 * end of this entity
927 */
928int
929xmlParserInputRead(xmlParserInputPtr in, int len) {
930 int ret;
931 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000932 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934#ifdef DEBUG_INPUT
935 xmlGenericError(xmlGenericErrorContext, "Read\n");
936#endif
937 if (in->buf == NULL) return(-1);
938 if (in->base == NULL) return(-1);
939 if (in->cur == NULL) return(-1);
940 if (in->buf->buffer == NULL) return(-1);
941 if (in->buf->readcallback == NULL) return(-1);
942
943 CHECK_BUFFER(in);
944
945 used = in->cur - in->buf->buffer->content;
946 ret = xmlBufferShrink(in->buf->buffer, used);
947 if (ret > 0) {
948 in->cur -= ret;
949 in->consumed += ret;
950 }
951 ret = xmlParserInputBufferRead(in->buf, len);
952 if (in->base != in->buf->buffer->content) {
953 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000954 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000955 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000956 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000957 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000959 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000960 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000961
962 CHECK_BUFFER(in);
963
964 return(ret);
965}
966
967/**
968 * xmlParserInputGrow:
969 * @in: an XML parser input
970 * @len: an indicative size for the lookahead
971 *
972 * This function increase the input for the parser. It tries to
973 * preserve pointers to the input buffer, and keep already read data
974 *
975 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
976 * end of this entity
977 */
978int
979xmlParserInputGrow(xmlParserInputPtr in, int len) {
980 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000981 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000982
983#ifdef DEBUG_INPUT
984 xmlGenericError(xmlGenericErrorContext, "Grow\n");
985#endif
986 if (in->buf == NULL) return(-1);
987 if (in->base == NULL) return(-1);
988 if (in->cur == NULL) return(-1);
989 if (in->buf->buffer == NULL) return(-1);
990
991 CHECK_BUFFER(in);
992
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000993 indx = in->cur - in->base;
994 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995
996 CHECK_BUFFER(in);
997
998 return(0);
999 }
1000 if (in->buf->readcallback != NULL)
1001 ret = xmlParserInputBufferGrow(in->buf, len);
1002 else
1003 return(0);
1004
1005 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001006 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001007 * block, but we use it really as an integer to do some
1008 * pointer arithmetic. Insure will raise it as a bug but in
1009 * that specific case, that's not !
1010 */
1011 if (in->base != in->buf->buffer->content) {
1012 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001013 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001014 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001015 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001016 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001018 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001019 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001020
1021 CHECK_BUFFER(in);
1022
1023 return(ret);
1024}
1025
1026/**
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1029 *
1030 * This function removes used input for the parser.
1031 */
1032void
1033xmlParserInputShrink(xmlParserInputPtr in) {
1034 int used;
1035 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001036 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001037
1038#ifdef DEBUG_INPUT
1039 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1040#endif
1041 if (in->buf == NULL) return;
1042 if (in->base == NULL) return;
1043 if (in->cur == NULL) return;
1044 if (in->buf->buffer == NULL) return;
1045
1046 CHECK_BUFFER(in);
1047
1048 used = in->cur - in->buf->buffer->content;
1049 /*
1050 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001051 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001052 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001053 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001054 return;
1055 if (used > INPUT_CHUNK) {
1056 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057 if (ret > 0) {
1058 in->cur -= ret;
1059 in->consumed += ret;
1060 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001061 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001072 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001073 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001075 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001077 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001078 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001079
1080 CHECK_BUFFER(in);
1081}
1082
1083/************************************************************************
1084 * *
1085 * UTF8 character input and related functions *
1086 * *
1087 ************************************************************************/
1088
1089/**
1090 * xmlNextChar:
1091 * @ctxt: the XML parser context
1092 *
1093 * Skip to the next char input char.
1094 */
1095
1096void
1097xmlNextChar(xmlParserCtxtPtr ctxt) {
1098 if (ctxt->instate == XML_PARSER_EOF)
1099 return;
1100
1101 /*
1102 * 2.11 End-of-Line Handling
1103 * the literal two-character sequence "#xD#xA" or a standalone
1104 * literal #xD, an XML processor must pass to the application
1105 * the single character #xA.
1106 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00001107 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001108 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001109 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1110 (ctxt->instate != XML_PARSER_COMMENT)) {
1111 /*
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1114 * automatically.
1115 * the auto closing should be blocked in other cases
1116 */
1117 xmlPopInput(ctxt);
1118 } else {
1119 if (*(ctxt->input->cur) == '\n') {
1120 ctxt->input->line++; ctxt->input->col = 1;
1121 } else ctxt->input->col++;
1122 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 *
1132 * Check for the 0x110000 limit too
1133 */
1134 const unsigned char *cur = ctxt->input->cur;
1135 unsigned char c;
1136
1137 c = *cur;
1138 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001139 if (cur[1] == 0)
1140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1141 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001142 goto encoding_error;
1143 if ((c & 0xe0) == 0xe0) {
1144 unsigned int val;
1145
Daniel Veillard561b7f82002-03-20 21:55:57 +00001146 if (cur[2] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001149 goto encoding_error;
1150 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001151 if (cur[3] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001154 ((cur[3] & 0xc0) != 0x80))
1155 goto encoding_error;
1156 /* 4-byte code */
1157 ctxt->input->cur += 4;
1158 val = (cur[0] & 0x7) << 18;
1159 val |= (cur[1] & 0x3f) << 12;
1160 val |= (cur[2] & 0x3f) << 6;
1161 val |= cur[3] & 0x3f;
1162 } else {
1163 /* 3-byte code */
1164 ctxt->input->cur += 3;
1165 val = (cur[0] & 0xf) << 12;
1166 val |= (cur[1] & 0x3f) << 6;
1167 val |= cur[2] & 0x3f;
1168 }
1169 if (((val > 0xd7ff) && (val < 0xe000)) ||
1170 ((val > 0xfffd) && (val < 0x10000)) ||
1171 (val >= 0x110000)) {
1172 if ((ctxt->sax != NULL) &&
1173 (ctxt->sax->error != NULL))
1174 ctxt->sax->error(ctxt->userData,
1175 "Char 0x%X out of allowed range\n", val);
1176 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1177 ctxt->wellFormed = 0;
1178 ctxt->disableSAX = 1;
1179 }
1180 } else
1181 /* 2-byte code */
1182 ctxt->input->cur += 2;
1183 } else
1184 /* 1-byte code */
1185 ctxt->input->cur++;
1186 } else {
1187 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001188 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001189 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001190 * XML constructs only use < 128 chars
1191 */
1192 ctxt->input->cur++;
1193 }
1194 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001197 }
1198 } else {
1199 ctxt->input->cur++;
1200 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001201 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001204 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001205 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001206 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1208 xmlPopInput(ctxt);
1209 return;
1210encoding_error:
1211 /*
1212 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001213 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1216 * encoding !)
1217 */
1218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1219 ctxt->sax->error(ctxt->userData,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001222 ctxt->input->cur[0], ctxt->input->cur[1],
1223 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001224 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001225 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001226 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1227
1228 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001229 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001230 return;
1231}
1232
1233/**
1234 * xmlCurrentChar:
1235 * @ctxt: the XML parser context
1236 * @len: pointer to the length of the char read
1237 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001238 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001239 * bytes in the input buffer. Implement the end of line normalization:
1240 * 2.11 End-of-Line Handling
1241 * Wherever an external parsed entity or the literal entity value
1242 * of an internal parsed entity contains either the literal two-character
1243 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1244 * must pass to the application the single character #xA.
1245 * This behavior can conveniently be produced by normalizing all
1246 * line breaks to #xA on input, before parsing.)
1247 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001248 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001249 */
1250
1251int
1252xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1253 if (ctxt->instate == XML_PARSER_EOF)
1254 return(0);
1255
Daniel Veillard561b7f82002-03-20 21:55:57 +00001256 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1257 *len = 1;
1258 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1261 /*
1262 * We are supposed to handle UTF8, check it's valid
1263 * From rfc2044: encoding of the Unicode values on UTF-8:
1264 *
1265 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1266 * 0000 0000-0000 007F 0xxxxxxx
1267 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1268 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1269 *
1270 * Check for the 0x110000 limit too
1271 */
1272 const unsigned char *cur = ctxt->input->cur;
1273 unsigned char c;
1274 unsigned int val;
1275
1276 c = *cur;
1277 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001278 if (cur[1] == 0)
1279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1280 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001281 goto encoding_error;
1282 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001283
1284 if (cur[2] == 0)
1285 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1286 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001287 goto encoding_error;
1288 if ((c & 0xf0) == 0xf0) {
1289 if (cur[3] == 0)
1290 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001291 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001292 ((cur[3] & 0xc0) != 0x80))
1293 goto encoding_error;
1294 /* 4-byte code */
1295 *len = 4;
1296 val = (cur[0] & 0x7) << 18;
1297 val |= (cur[1] & 0x3f) << 12;
1298 val |= (cur[2] & 0x3f) << 6;
1299 val |= cur[3] & 0x3f;
1300 } else {
1301 /* 3-byte code */
1302 *len = 3;
1303 val = (cur[0] & 0xf) << 12;
1304 val |= (cur[1] & 0x3f) << 6;
1305 val |= cur[2] & 0x3f;
1306 }
1307 } else {
1308 /* 2-byte code */
1309 *len = 2;
1310 val = (cur[0] & 0x1f) << 6;
1311 val |= cur[1] & 0x3f;
1312 }
1313 if (!IS_CHAR(val)) {
1314 if ((ctxt->sax != NULL) &&
1315 (ctxt->sax->error != NULL))
1316 ctxt->sax->error(ctxt->userData,
1317 "Char 0x%X out of allowed range\n", val);
1318 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1319 ctxt->wellFormed = 0;
1320 ctxt->disableSAX = 1;
1321 }
1322 return(val);
1323 } else {
1324 /* 1-byte code */
1325 *len = 1;
1326 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001327 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001328 ctxt->nbChars++;
1329 ctxt->input->cur++;
1330 }
1331 return(0xA);
1332 }
1333 return((int) *ctxt->input->cur);
1334 }
1335 }
1336 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001337 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001338 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001339 * XML constructs only use < 128 chars
1340 */
1341 *len = 1;
1342 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001343 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001344 ctxt->nbChars++;
1345 ctxt->input->cur++;
1346 }
1347 return(0xA);
1348 }
1349 return((int) *ctxt->input->cur);
1350encoding_error:
1351 /*
1352 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001353 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001354 * declaration header. Report the error and switch the encoding
1355 * to ISO-Latin-1 (if you don't like this policy, just declare the
1356 * encoding !)
1357 */
1358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1359 ctxt->sax->error(ctxt->userData,
1360 "Input is not proper UTF-8, indicate encoding !\n");
1361 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001362 ctxt->input->cur[0], ctxt->input->cur[1],
1363 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001364 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001365 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001366 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1367
1368 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1369 *len = 1;
1370 return((int) *ctxt->input->cur);
1371}
1372
1373/**
1374 * xmlStringCurrentChar:
1375 * @ctxt: the XML parser context
1376 * @cur: pointer to the beginning of the char
1377 * @len: pointer to the length of the char read
1378 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001379 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001380 * bytes in the input buffer.
1381 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001382 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001383 */
1384
1385int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001386xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1387{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001388 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001389 /*
1390 * We are supposed to handle UTF8, check it's valid
1391 * From rfc2044: encoding of the Unicode values on UTF-8:
1392 *
1393 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1394 * 0000 0000-0000 007F 0xxxxxxx
1395 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1396 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1397 *
1398 * Check for the 0x110000 limit too
1399 */
1400 unsigned char c;
1401 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001402
Daniel Veillardd8224e02002-01-13 15:43:22 +00001403 c = *cur;
1404 if (c & 0x80) {
1405 if ((cur[1] & 0xc0) != 0x80)
1406 goto encoding_error;
1407 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001408
Daniel Veillardd8224e02002-01-13 15:43:22 +00001409 if ((cur[2] & 0xc0) != 0x80)
1410 goto encoding_error;
1411 if ((c & 0xf0) == 0xf0) {
1412 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1413 goto encoding_error;
1414 /* 4-byte code */
1415 *len = 4;
1416 val = (cur[0] & 0x7) << 18;
1417 val |= (cur[1] & 0x3f) << 12;
1418 val |= (cur[2] & 0x3f) << 6;
1419 val |= cur[3] & 0x3f;
1420 } else {
1421 /* 3-byte code */
1422 *len = 3;
1423 val = (cur[0] & 0xf) << 12;
1424 val |= (cur[1] & 0x3f) << 6;
1425 val |= cur[2] & 0x3f;
1426 }
1427 } else {
1428 /* 2-byte code */
1429 *len = 2;
1430 val = (cur[0] & 0x1f) << 6;
1431 val |= cur[1] & 0x3f;
1432 }
1433 if (!IS_CHAR(val)) {
1434 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1435 (ctxt->sax->error != NULL))
1436 ctxt->sax->error(ctxt->userData,
1437 "Char 0x%X out of allowed range\n",
1438 val);
1439 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1440 ctxt->wellFormed = 0;
1441 ctxt->disableSAX = 1;
1442 }
1443 return (val);
1444 } else {
1445 /* 1-byte code */
1446 *len = 1;
1447 return ((int) *cur);
1448 }
Owen Taylor3473f882001-02-23 17:55:21 +00001449 }
1450 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001451 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001452 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001453 * XML constructs only use < 128 chars
1454 */
1455 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001456 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001457encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001458
Owen Taylor3473f882001-02-23 17:55:21 +00001459 /*
1460 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001461 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001462 * declaration header. Report the error and switch the encoding
1463 * to ISO-Latin-1 (if you don't like this policy, just declare the
1464 * encoding !)
1465 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001466 if (ctxt != NULL) {
1467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1468 ctxt->sax->error(ctxt->userData,
1469 "Input is not proper UTF-8, indicate encoding !\n");
1470 ctxt->sax->error(ctxt->userData,
1471 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1472 ctxt->input->cur[0], ctxt->input->cur[1],
1473 ctxt->input->cur[2], ctxt->input->cur[3]);
1474 }
1475 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001476 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001477 }
Owen Taylor3473f882001-02-23 17:55:21 +00001478
1479 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001480 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001481}
1482
1483/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001484 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001485 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001486 * @val: the char value
1487 *
1488 * append the char value in the array
1489 *
1490 * Returns the number of xmlChar written
1491 */
Owen Taylor3473f882001-02-23 17:55:21 +00001492int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001493xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001494 /*
1495 * We are supposed to handle UTF8, check it's valid
1496 * From rfc2044: encoding of the Unicode values on UTF-8:
1497 *
1498 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1499 * 0000 0000-0000 007F 0xxxxxxx
1500 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1501 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1502 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001503 if (val >= 0x80) {
1504 xmlChar *savedout = out;
1505 int bits;
1506 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1507 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1508 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1509 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001510 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001511 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001512 val);
1513 return(0);
1514 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001515 for ( ; bits >= 0; bits-= 6)
1516 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1517 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 }
1519 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001520 return 1;
1521}
1522
1523/**
1524 * xmlCopyChar:
1525 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001526 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001527 * @val: the char value
1528 *
1529 * append the char value in the array
1530 *
1531 * Returns the number of xmlChar written
1532 */
1533
1534int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001535xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001536 /* the len parameter is ignored */
1537 if (val >= 0x80) {
1538 return(xmlCopyCharMultiByte (out, val));
1539 }
1540 *out = (xmlChar) val;
1541 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001542}
1543
1544/************************************************************************
1545 * *
1546 * Commodity functions to switch encodings *
1547 * *
1548 ************************************************************************/
1549
1550/**
1551 * xmlSwitchEncoding:
1552 * @ctxt: the parser context
1553 * @enc: the encoding value (number)
1554 *
1555 * change the input functions when discovering the character encoding
1556 * of a given entity.
1557 *
1558 * Returns 0 in case of success, -1 otherwise
1559 */
1560int
1561xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1562{
1563 xmlCharEncodingHandlerPtr handler;
1564
1565 switch (enc) {
1566 case XML_CHAR_ENCODING_ERROR:
1567 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1569 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1570 ctxt->wellFormed = 0;
1571 ctxt->disableSAX = 1;
1572 break;
1573 case XML_CHAR_ENCODING_NONE:
1574 /* let's assume it's UTF-8 without the XML decl */
1575 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1576 return(0);
1577 case XML_CHAR_ENCODING_UTF8:
1578 /* default encoding, no conversion should be needed */
1579 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001580
1581 /*
1582 * Errata on XML-1.0 June 20 2001
1583 * Specific handling of the Byte Order Mark for
1584 * UTF-8
1585 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001586 if ((ctxt->input != NULL) &&
1587 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001588 (ctxt->input->cur[1] == 0xBB) &&
1589 (ctxt->input->cur[2] == 0xBF)) {
1590 ctxt->input->cur += 3;
1591 }
Owen Taylor3473f882001-02-23 17:55:21 +00001592 return(0);
1593 default:
1594 break;
1595 }
1596 handler = xmlGetCharEncodingHandler(enc);
1597 if (handler == NULL) {
1598 /*
1599 * Default handlers.
1600 */
1601 switch (enc) {
1602 case XML_CHAR_ENCODING_ERROR:
1603 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1606 ctxt->wellFormed = 0;
1607 ctxt->disableSAX = 1;
1608 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1609 break;
1610 case XML_CHAR_ENCODING_NONE:
1611 /* let's assume it's UTF-8 without the XML decl */
1612 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1613 return(0);
1614 case XML_CHAR_ENCODING_UTF8:
1615 case XML_CHAR_ENCODING_ASCII:
1616 /* default encoding, no conversion should be needed */
1617 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1618 return(0);
1619 case XML_CHAR_ENCODING_UTF16LE:
1620 break;
1621 case XML_CHAR_ENCODING_UTF16BE:
1622 break;
1623 case XML_CHAR_ENCODING_UCS4LE:
1624 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626 ctxt->sax->error(ctxt->userData,
1627 "char encoding USC4 little endian not supported\n");
1628 break;
1629 case XML_CHAR_ENCODING_UCS4BE:
1630 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1632 ctxt->sax->error(ctxt->userData,
1633 "char encoding USC4 big endian not supported\n");
1634 break;
1635 case XML_CHAR_ENCODING_EBCDIC:
1636 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638 ctxt->sax->error(ctxt->userData,
1639 "char encoding EBCDIC not supported\n");
1640 break;
1641 case XML_CHAR_ENCODING_UCS4_2143:
1642 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "char encoding UCS4 2143 not supported\n");
1646 break;
1647 case XML_CHAR_ENCODING_UCS4_3412:
1648 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650 ctxt->sax->error(ctxt->userData,
1651 "char encoding UCS4 3412 not supported\n");
1652 break;
1653 case XML_CHAR_ENCODING_UCS2:
1654 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "char encoding UCS2 not supported\n");
1658 break;
1659 case XML_CHAR_ENCODING_8859_1:
1660 case XML_CHAR_ENCODING_8859_2:
1661 case XML_CHAR_ENCODING_8859_3:
1662 case XML_CHAR_ENCODING_8859_4:
1663 case XML_CHAR_ENCODING_8859_5:
1664 case XML_CHAR_ENCODING_8859_6:
1665 case XML_CHAR_ENCODING_8859_7:
1666 case XML_CHAR_ENCODING_8859_8:
1667 case XML_CHAR_ENCODING_8859_9:
1668 /*
1669 * We used to keep the internal content in the
1670 * document encoding however this turns being unmaintainable
1671 * So xmlGetCharEncodingHandler() will return non-null
1672 * values for this now.
1673 */
1674 if ((ctxt->inputNr == 1) &&
1675 (ctxt->encoding == NULL) &&
1676 (ctxt->input->encoding != NULL)) {
1677 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1678 }
1679 ctxt->charset = enc;
1680 return(0);
1681 case XML_CHAR_ENCODING_2022_JP:
1682 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1684 ctxt->sax->error(ctxt->userData,
1685 "char encoding ISO-2022-JPnot supported\n");
1686 break;
1687 case XML_CHAR_ENCODING_SHIFT_JIS:
1688 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
1691 "char encoding Shift_JIS not supported\n");
1692 break;
1693 case XML_CHAR_ENCODING_EUC_JP:
1694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696 ctxt->sax->error(ctxt->userData,
1697 "char encoding EUC-JPnot supported\n");
1698 break;
1699 }
1700 }
1701 if (handler == NULL)
1702 return(-1);
1703 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1704 return(xmlSwitchToEncoding(ctxt, handler));
1705}
1706
1707/**
1708 * xmlSwitchToEncoding:
1709 * @ctxt: the parser context
1710 * @handler: the encoding handler
1711 *
1712 * change the input functions when discovering the character encoding
1713 * of a given entity.
1714 *
1715 * Returns 0 in case of success, -1 otherwise
1716 */
1717int
1718xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1719{
1720 int nbchars;
1721
1722 if (handler != NULL) {
1723 if (ctxt->input != NULL) {
1724 if (ctxt->input->buf != NULL) {
1725 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001726 /*
1727 * Check in case the auto encoding detetection triggered
1728 * in already.
1729 */
Owen Taylor3473f882001-02-23 17:55:21 +00001730 if (ctxt->input->buf->encoder == handler)
1731 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001732
1733 /*
1734 * "UTF-16" can be used for both LE and BE
1735 */
1736 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1737 BAD_CAST "UTF-16", 6)) &&
1738 (!xmlStrncmp(BAD_CAST handler->name,
1739 BAD_CAST "UTF-16", 6))) {
1740 return(0);
1741 }
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 /*
1744 * Note: this is a bit dangerous, but that's what it
1745 * takes to use nearly compatible signature for different
1746 * encodings.
1747 */
1748 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1749 ctxt->input->buf->encoder = handler;
1750 return(0);
1751 }
1752 ctxt->input->buf->encoder = handler;
1753
1754 /*
1755 * Is there already some content down the pipe to convert ?
1756 */
1757 if ((ctxt->input->buf->buffer != NULL) &&
1758 (ctxt->input->buf->buffer->use > 0)) {
1759 int processed;
1760
1761 /*
1762 * Specific handling of the Byte Order Mark for
1763 * UTF-16
1764 */
1765 if ((handler->name != NULL) &&
1766 (!strcmp(handler->name, "UTF-16LE")) &&
1767 (ctxt->input->cur[0] == 0xFF) &&
1768 (ctxt->input->cur[1] == 0xFE)) {
1769 ctxt->input->cur += 2;
1770 }
1771 if ((handler->name != NULL) &&
1772 (!strcmp(handler->name, "UTF-16BE")) &&
1773 (ctxt->input->cur[0] == 0xFE) &&
1774 (ctxt->input->cur[1] == 0xFF)) {
1775 ctxt->input->cur += 2;
1776 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001777 /*
1778 * Errata on XML-1.0 June 20 2001
1779 * Specific handling of the Byte Order Mark for
1780 * UTF-8
1781 */
1782 if ((handler->name != NULL) &&
1783 (!strcmp(handler->name, "UTF-8")) &&
1784 (ctxt->input->cur[0] == 0xEF) &&
1785 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001786 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001787 ctxt->input->cur += 3;
1788 }
Owen Taylor3473f882001-02-23 17:55:21 +00001789
1790 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001791 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001792 * Move it as the raw buffer and create a new input buffer
1793 */
1794 processed = ctxt->input->cur - ctxt->input->base;
1795 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1796 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1797 ctxt->input->buf->buffer = xmlBufferCreate();
1798
1799 if (ctxt->html) {
1800 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001801 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001802 */
1803 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1804 ctxt->input->buf->buffer,
1805 ctxt->input->buf->raw);
1806 } else {
1807 /*
1808 * convert just enough to get
1809 * '<?xml version="1.0" encoding="xxx"?>'
1810 * parsed with the autodetected encoding
1811 * into the parser reading buffer.
1812 */
1813 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1814 ctxt->input->buf->buffer,
1815 ctxt->input->buf->raw);
1816 }
1817 if (nbchars < 0) {
1818 xmlGenericError(xmlGenericErrorContext,
1819 "xmlSwitchToEncoding: encoder error\n");
1820 return(-1);
1821 }
1822 ctxt->input->base =
1823 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001824 ctxt->input->end =
1825 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 }
1828 return(0);
1829 } else {
1830 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1831 /*
1832 * When parsing a static memory array one must know the
1833 * size to be able to convert the buffer.
1834 */
1835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1836 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001837 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001838 return(-1);
1839 } else {
1840 int processed;
1841
1842 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001843 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001844 * Move it as the raw buffer and create a new input buffer
1845 */
1846 processed = ctxt->input->cur - ctxt->input->base;
1847
1848 ctxt->input->buf->raw = xmlBufferCreate();
1849 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1850 ctxt->input->length - processed);
1851 ctxt->input->buf->buffer = xmlBufferCreate();
1852
1853 /*
1854 * convert as much as possible of the raw input
1855 * to the parser reading buffer.
1856 */
1857 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1858 ctxt->input->buf->buffer,
1859 ctxt->input->buf->raw);
1860 if (nbchars < 0) {
1861 xmlGenericError(xmlGenericErrorContext,
1862 "xmlSwitchToEncoding: encoder error\n");
1863 return(-1);
1864 }
1865
1866 /*
1867 * Conversion succeeded, get rid of the old buffer
1868 */
1869 if ((ctxt->input->free != NULL) &&
1870 (ctxt->input->base != NULL))
1871 ctxt->input->free((xmlChar *) ctxt->input->base);
1872 ctxt->input->base =
1873 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001874 ctxt->input->end =
1875 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001876 }
1877 }
1878 } else {
1879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1880 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001881 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001882 return(-1);
1883 }
1884 /*
1885 * The parsing is now done in UTF8 natively
1886 */
1887 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1888 } else
1889 return(-1);
1890 return(0);
1891
1892}
1893
1894/************************************************************************
1895 * *
1896 * Commodity functions to handle entities processing *
1897 * *
1898 ************************************************************************/
1899
1900/**
1901 * xmlFreeInputStream:
1902 * @input: an xmlParserInputPtr
1903 *
1904 * Free up an input stream.
1905 */
1906void
1907xmlFreeInputStream(xmlParserInputPtr input) {
1908 if (input == NULL) return;
1909
1910 if (input->filename != NULL) xmlFree((char *) input->filename);
1911 if (input->directory != NULL) xmlFree((char *) input->directory);
1912 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1913 if (input->version != NULL) xmlFree((char *) input->version);
1914 if ((input->free != NULL) && (input->base != NULL))
1915 input->free((xmlChar *) input->base);
1916 if (input->buf != NULL)
1917 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001918 xmlFree(input);
1919}
1920
1921/**
1922 * xmlNewInputStream:
1923 * @ctxt: an XML parser context
1924 *
1925 * Create a new input stream structure
1926 * Returns the new input stream or NULL
1927 */
1928xmlParserInputPtr
1929xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1930 xmlParserInputPtr input;
1931
1932 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1933 if (input == NULL) {
1934 if (ctxt != NULL) {
1935 ctxt->errNo = XML_ERR_NO_MEMORY;
1936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1937 ctxt->sax->error(ctxt->userData,
1938 "malloc: couldn't allocate a new input stream\n");
1939 ctxt->errNo = XML_ERR_NO_MEMORY;
1940 }
1941 return(NULL);
1942 }
1943 memset(input, 0, sizeof(xmlParserInput));
1944 input->line = 1;
1945 input->col = 1;
1946 input->standalone = -1;
1947 return(input);
1948}
1949
1950/**
1951 * xmlNewIOInputStream:
1952 * @ctxt: an XML parser context
1953 * @input: an I/O Input
1954 * @enc: the charset encoding if known
1955 *
1956 * Create a new input stream structure encapsulating the @input into
1957 * a stream suitable for the parser.
1958 *
1959 * Returns the new input stream or NULL
1960 */
1961xmlParserInputPtr
1962xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1963 xmlCharEncoding enc) {
1964 xmlParserInputPtr inputStream;
1965
1966 if (xmlParserDebugEntities)
1967 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1968 inputStream = xmlNewInputStream(ctxt);
1969 if (inputStream == NULL) {
1970 return(NULL);
1971 }
1972 inputStream->filename = NULL;
1973 inputStream->buf = input;
1974 inputStream->base = inputStream->buf->buffer->content;
1975 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001976 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001977 if (enc != XML_CHAR_ENCODING_NONE) {
1978 xmlSwitchEncoding(ctxt, enc);
1979 }
1980
1981 return(inputStream);
1982}
1983
1984/**
1985 * xmlNewEntityInputStream:
1986 * @ctxt: an XML parser context
1987 * @entity: an Entity pointer
1988 *
1989 * Create a new input stream based on an xmlEntityPtr
1990 *
1991 * Returns the new input stream or NULL
1992 */
1993xmlParserInputPtr
1994xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1995 xmlParserInputPtr input;
1996
1997 if (entity == NULL) {
1998 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2000 ctxt->sax->error(ctxt->userData,
2001 "internal: xmlNewEntityInputStream entity = NULL\n");
2002 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2003 return(NULL);
2004 }
2005 if (xmlParserDebugEntities)
2006 xmlGenericError(xmlGenericErrorContext,
2007 "new input from entity: %s\n", entity->name);
2008 if (entity->content == NULL) {
2009 switch (entity->etype) {
2010 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2011 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
2014 "xmlNewEntityInputStream unparsed entity !\n");
2015 break;
2016 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2017 case XML_EXTERNAL_PARAMETER_ENTITY:
2018 return(xmlLoadExternalEntity((char *) entity->URI,
2019 (char *) entity->ExternalID, ctxt));
2020 case XML_INTERNAL_GENERAL_ENTITY:
2021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2022 ctxt->sax->error(ctxt->userData,
2023 "Internal entity %s without content !\n", entity->name);
2024 break;
2025 case XML_INTERNAL_PARAMETER_ENTITY:
2026 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2028 ctxt->sax->error(ctxt->userData,
2029 "Internal parameter entity %s without content !\n", entity->name);
2030 break;
2031 case XML_INTERNAL_PREDEFINED_ENTITY:
2032 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2034 ctxt->sax->error(ctxt->userData,
2035 "Predefined entity %s without content !\n", entity->name);
2036 break;
2037 }
2038 return(NULL);
2039 }
2040 input = xmlNewInputStream(ctxt);
2041 if (input == NULL) {
2042 return(NULL);
2043 }
2044 input->filename = (char *) entity->URI;
2045 input->base = entity->content;
2046 input->cur = entity->content;
2047 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002048 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002049 return(input);
2050}
2051
2052/**
2053 * xmlNewStringInputStream:
2054 * @ctxt: an XML parser context
2055 * @buffer: an memory buffer
2056 *
2057 * Create a new input stream based on a memory buffer.
2058 * Returns the new input stream
2059 */
2060xmlParserInputPtr
2061xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2062 xmlParserInputPtr input;
2063
2064 if (buffer == NULL) {
2065 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2067 ctxt->sax->error(ctxt->userData,
2068 "internal: xmlNewStringInputStream string = NULL\n");
2069 return(NULL);
2070 }
2071 if (xmlParserDebugEntities)
2072 xmlGenericError(xmlGenericErrorContext,
2073 "new fixed input: %.30s\n", buffer);
2074 input = xmlNewInputStream(ctxt);
2075 if (input == NULL) {
2076 return(NULL);
2077 }
2078 input->base = buffer;
2079 input->cur = buffer;
2080 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002081 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002082 return(input);
2083}
2084
2085/**
2086 * xmlNewInputFromFile:
2087 * @ctxt: an XML parser context
2088 * @filename: the filename to use as entity
2089 *
2090 * Create a new input stream based on a file.
2091 *
2092 * Returns the new input stream or NULL in case of error
2093 */
2094xmlParserInputPtr
2095xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2096 xmlParserInputBufferPtr buf;
2097 xmlParserInputPtr inputStream;
2098 char *directory = NULL;
2099 xmlChar *URI = NULL;
2100
2101 if (xmlParserDebugEntities)
2102 xmlGenericError(xmlGenericErrorContext,
2103 "new input from file: %s\n", filename);
2104 if (ctxt == NULL) return(NULL);
2105 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2106 if (buf == NULL)
2107 return(NULL);
2108
2109 URI = xmlStrdup((xmlChar *) filename);
2110 directory = xmlParserGetDirectory((const char *) URI);
2111
2112 inputStream = xmlNewInputStream(ctxt);
2113 if (inputStream == NULL) {
2114 if (directory != NULL) xmlFree((char *) directory);
2115 if (URI != NULL) xmlFree((char *) URI);
2116 return(NULL);
2117 }
2118
2119 inputStream->filename = (const char *) URI;
2120 inputStream->directory = directory;
2121 inputStream->buf = buf;
2122
2123 inputStream->base = inputStream->buf->buffer->content;
2124 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002125 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002126 if ((ctxt->directory == NULL) && (directory != NULL))
2127 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2128 return(inputStream);
2129}
2130
2131/************************************************************************
2132 * *
2133 * Commodity functions to handle parser contexts *
2134 * *
2135 ************************************************************************/
2136
2137/**
2138 * xmlInitParserCtxt:
2139 * @ctxt: an XML parser context
2140 *
2141 * Initialize a parser context
2142 */
2143
2144void
2145xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2146{
2147 xmlSAXHandler *sax;
2148
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002149 if(ctxt==NULL) {
2150 xmlGenericError(xmlGenericErrorContext,
2151 "xmlInitParserCtxt: NULL context given\n");
2152 return;
2153 }
2154
Owen Taylor3473f882001-02-23 17:55:21 +00002155 xmlDefaultSAXHandlerInit();
2156
2157 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2158 if (sax == NULL) {
2159 xmlGenericError(xmlGenericErrorContext,
2160 "xmlInitParserCtxt: out of memory\n");
2161 }
2162 else
2163 memset(sax, 0, sizeof(xmlSAXHandler));
2164
2165 /* Allocate the Input stack */
2166 ctxt->inputTab = (xmlParserInputPtr *)
2167 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2168 if (ctxt->inputTab == NULL) {
2169 xmlGenericError(xmlGenericErrorContext,
2170 "xmlInitParserCtxt: out of memory\n");
2171 ctxt->inputNr = 0;
2172 ctxt->inputMax = 0;
2173 ctxt->input = NULL;
2174 return;
2175 }
2176 ctxt->inputNr = 0;
2177 ctxt->inputMax = 5;
2178 ctxt->input = NULL;
2179
2180 ctxt->version = NULL;
2181 ctxt->encoding = NULL;
2182 ctxt->standalone = -1;
2183 ctxt->hasExternalSubset = 0;
2184 ctxt->hasPErefs = 0;
2185 ctxt->html = 0;
2186 ctxt->external = 0;
2187 ctxt->instate = XML_PARSER_START;
2188 ctxt->token = 0;
2189 ctxt->directory = NULL;
2190
2191 /* Allocate the Node stack */
2192 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2193 if (ctxt->nodeTab == NULL) {
2194 xmlGenericError(xmlGenericErrorContext,
2195 "xmlInitParserCtxt: out of memory\n");
2196 ctxt->nodeNr = 0;
2197 ctxt->nodeMax = 0;
2198 ctxt->node = NULL;
2199 ctxt->inputNr = 0;
2200 ctxt->inputMax = 0;
2201 ctxt->input = NULL;
2202 return;
2203 }
2204 ctxt->nodeNr = 0;
2205 ctxt->nodeMax = 10;
2206 ctxt->node = NULL;
2207
2208 /* Allocate the Name stack */
2209 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2210 if (ctxt->nameTab == NULL) {
2211 xmlGenericError(xmlGenericErrorContext,
2212 "xmlInitParserCtxt: out of memory\n");
2213 ctxt->nodeNr = 0;
2214 ctxt->nodeMax = 0;
2215 ctxt->node = NULL;
2216 ctxt->inputNr = 0;
2217 ctxt->inputMax = 0;
2218 ctxt->input = NULL;
2219 ctxt->nameNr = 0;
2220 ctxt->nameMax = 0;
2221 ctxt->name = NULL;
2222 return;
2223 }
2224 ctxt->nameNr = 0;
2225 ctxt->nameMax = 10;
2226 ctxt->name = NULL;
2227
2228 /* Allocate the space stack */
2229 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2230 if (ctxt->spaceTab == NULL) {
2231 xmlGenericError(xmlGenericErrorContext,
2232 "xmlInitParserCtxt: out of memory\n");
2233 ctxt->nodeNr = 0;
2234 ctxt->nodeMax = 0;
2235 ctxt->node = NULL;
2236 ctxt->inputNr = 0;
2237 ctxt->inputMax = 0;
2238 ctxt->input = NULL;
2239 ctxt->nameNr = 0;
2240 ctxt->nameMax = 0;
2241 ctxt->name = NULL;
2242 ctxt->spaceNr = 0;
2243 ctxt->spaceMax = 0;
2244 ctxt->space = NULL;
2245 return;
2246 }
2247 ctxt->spaceNr = 1;
2248 ctxt->spaceMax = 10;
2249 ctxt->spaceTab[0] = -1;
2250 ctxt->space = &ctxt->spaceTab[0];
2251
Daniel Veillard14be0a12001-03-03 18:50:55 +00002252 ctxt->sax = sax;
Daniel Veillard8606bbb2002-11-12 12:36:52 +00002253 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Daniel Veillard14be0a12001-03-03 18:50:55 +00002254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 ctxt->userData = ctxt;
2256 ctxt->myDoc = NULL;
2257 ctxt->wellFormed = 1;
2258 ctxt->valid = 1;
2259 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2260 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2261 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002262 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002263 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002264 if (ctxt->keepBlanks == 0)
2265 sax->ignorableWhitespace = ignorableWhitespace;
2266
Owen Taylor3473f882001-02-23 17:55:21 +00002267 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002268 ctxt->vctxt.error = xmlParserValidityError;
2269 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002270 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (xmlGetWarningsDefaultValue == 0)
2272 ctxt->vctxt.warning = NULL;
2273 else
2274 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002275 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 }
2277 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2278 ctxt->record_info = 0;
2279 ctxt->nbChars = 0;
2280 ctxt->checkIndex = 0;
2281 ctxt->inSubset = 0;
2282 ctxt->errNo = XML_ERR_OK;
2283 ctxt->depth = 0;
2284 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002285 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 xmlInitNodeInfoSeq(&ctxt->node_seq);
2287}
2288
2289/**
2290 * xmlFreeParserCtxt:
2291 * @ctxt: an XML parser context
2292 *
2293 * Free all the memory used by a parser context. However the parsed
2294 * document in ctxt->myDoc is not freed.
2295 */
2296
2297void
2298xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2299{
2300 xmlParserInputPtr input;
2301 xmlChar *oldname;
2302
2303 if (ctxt == NULL) return;
2304
2305 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2306 xmlFreeInputStream(input);
2307 }
2308 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2309 xmlFree(oldname);
2310 }
2311 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2312 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2313 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2314 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2315 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2316 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2317 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2318 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2319 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002320 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2321 xmlFree(ctxt->sax);
2322 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002323 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002324#ifdef LIBXML_CATALOG_ENABLED
2325 if (ctxt->catalogs != NULL)
2326 xmlCatalogFreeLocal(ctxt->catalogs);
2327#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002328 xmlFree(ctxt);
2329}
2330
2331/**
2332 * xmlNewParserCtxt:
2333 *
2334 * Allocate and initialize a new parser context.
2335 *
2336 * Returns the xmlParserCtxtPtr or NULL
2337 */
2338
2339xmlParserCtxtPtr
2340xmlNewParserCtxt()
2341{
2342 xmlParserCtxtPtr ctxt;
2343
2344 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2345 if (ctxt == NULL) {
2346 xmlGenericError(xmlGenericErrorContext,
2347 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002348 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002349 return(NULL);
2350 }
2351 memset(ctxt, 0, sizeof(xmlParserCtxt));
2352 xmlInitParserCtxt(ctxt);
2353 return(ctxt);
2354}
2355
2356/************************************************************************
2357 * *
2358 * Handling of node informations *
2359 * *
2360 ************************************************************************/
2361
2362/**
2363 * xmlClearParserCtxt:
2364 * @ctxt: an XML parser context
2365 *
2366 * Clear (release owned resources) and reinitialize a parser context
2367 */
2368
2369void
2370xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2371{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002372 if (ctxt==NULL)
2373 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002374 xmlClearNodeInfoSeq(&ctxt->node_seq);
2375 xmlInitParserCtxt(ctxt);
2376}
2377
2378/**
2379 * xmlParserFindNodeInfo:
2380 * @ctxt: an XML parser context
2381 * @node: an XML node within the tree
2382 *
2383 * Find the parser node info struct for a given node
2384 *
2385 * Returns an xmlParserNodeInfo block pointer or NULL
2386 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002387const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2388 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002389{
2390 unsigned long pos;
2391
2392 /* Find position where node should be at */
2393 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002394 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002395 return &ctx->node_seq.buffer[pos];
2396 else
2397 return NULL;
2398}
2399
2400
2401/**
2402 * xmlInitNodeInfoSeq:
2403 * @seq: a node info sequence pointer
2404 *
2405 * -- Initialize (set to initial state) node info sequence
2406 */
2407void
2408xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2409{
2410 seq->length = 0;
2411 seq->maximum = 0;
2412 seq->buffer = NULL;
2413}
2414
2415/**
2416 * xmlClearNodeInfoSeq:
2417 * @seq: a node info sequence pointer
2418 *
2419 * -- Clear (release memory and reinitialize) node
2420 * info sequence
2421 */
2422void
2423xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2424{
2425 if ( seq->buffer != NULL )
2426 xmlFree(seq->buffer);
2427 xmlInitNodeInfoSeq(seq);
2428}
2429
2430
2431/**
2432 * xmlParserFindNodeInfoIndex:
2433 * @seq: a node info sequence pointer
2434 * @node: an XML node pointer
2435 *
2436 *
2437 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2438 * the given node is or should be at in a sorted sequence
2439 *
2440 * Returns a long indicating the position of the record
2441 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002442unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2443 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002444{
2445 unsigned long upper, lower, middle;
2446 int found = 0;
2447
2448 /* Do a binary search for the key */
2449 lower = 1;
2450 upper = seq->length;
2451 middle = 0;
2452 while ( lower <= upper && !found) {
2453 middle = lower + (upper - lower) / 2;
2454 if ( node == seq->buffer[middle - 1].node )
2455 found = 1;
2456 else if ( node < seq->buffer[middle - 1].node )
2457 upper = middle - 1;
2458 else
2459 lower = middle + 1;
2460 }
2461
2462 /* Return position */
2463 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2464 return middle;
2465 else
2466 return middle - 1;
2467}
2468
2469
2470/**
2471 * xmlParserAddNodeInfo:
2472 * @ctxt: an XML parser context
2473 * @info: a node info sequence pointer
2474 *
2475 * Insert node info record into the sorted sequence
2476 */
2477void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002478xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002479 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002480{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002481 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002482
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002483 /* Find pos and check to see if node is already in the sequence */
2484 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2485 info->node);
2486 if (pos < ctxt->node_seq.length
2487 && ctxt->node_seq.buffer[pos].node == info->node) {
2488 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002489 }
2490
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002491 /* Otherwise, we need to add new node to buffer */
2492 else {
2493 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2494 xmlParserNodeInfo *tmp_buffer;
2495 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002496
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002497 if (ctxt->node_seq.maximum == 0)
2498 ctxt->node_seq.maximum = 2;
2499 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2500 (2 * ctxt->node_seq.maximum));
2501
2502 if (ctxt->node_seq.buffer == NULL)
2503 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2504 else
2505 tmp_buffer =
2506 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2507 byte_size);
2508
2509 if (tmp_buffer == NULL) {
2510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2511 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2512 ctxt->errNo = XML_ERR_NO_MEMORY;
2513 return;
2514 }
2515 ctxt->node_seq.buffer = tmp_buffer;
2516 ctxt->node_seq.maximum *= 2;
2517 }
2518
2519 /* If position is not at end, move elements out of the way */
2520 if (pos != ctxt->node_seq.length) {
2521 unsigned long i;
2522
2523 for (i = ctxt->node_seq.length; i > pos; i--)
2524 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2525 }
2526
2527 /* Copy element and increase length */
2528 ctxt->node_seq.buffer[pos] = *info;
2529 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 }
Owen Taylor3473f882001-02-23 17:55:21 +00002531}
2532
2533/************************************************************************
2534 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002535 * Defaults settings *
2536 * *
2537 ************************************************************************/
2538/**
2539 * xmlPedanticParserDefault:
2540 * @val: int 0 or 1
2541 *
2542 * Set and return the previous value for enabling pedantic warnings.
2543 *
2544 * Returns the last value for 0 for no substitution, 1 for substitution.
2545 */
2546
2547int
2548xmlPedanticParserDefault(int val) {
2549 int old = xmlPedanticParserDefaultValue;
2550
2551 xmlPedanticParserDefaultValue = val;
2552 return(old);
2553}
2554
2555/**
2556 * xmlLineNumbersDefault:
2557 * @val: int 0 or 1
2558 *
2559 * Set and return the previous value for enabling line numbers in elements
2560 * contents. This may break on old application and is turned off by default.
2561 *
2562 * Returns the last value for 0 for no substitution, 1 for substitution.
2563 */
2564
2565int
2566xmlLineNumbersDefault(int val) {
2567 int old = xmlLineNumbersDefaultValue;
2568
2569 xmlLineNumbersDefaultValue = val;
2570 return(old);
2571}
2572
2573/**
2574 * xmlSubstituteEntitiesDefault:
2575 * @val: int 0 or 1
2576 *
2577 * Set and return the previous value for default entity support.
2578 * Initially the parser always keep entity references instead of substituting
2579 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002580 * default parser behavior
2581 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002582 * file basis.
2583 *
2584 * Returns the last value for 0 for no substitution, 1 for substitution.
2585 */
2586
2587int
2588xmlSubstituteEntitiesDefault(int val) {
2589 int old = xmlSubstituteEntitiesDefaultValue;
2590
2591 xmlSubstituteEntitiesDefaultValue = val;
2592 return(old);
2593}
2594
2595/**
2596 * xmlKeepBlanksDefault:
2597 * @val: int 0 or 1
2598 *
2599 * Set and return the previous value for default blanks text nodes support.
2600 * The 1.x version of the parser used an heuristic to try to detect
2601 * ignorable white spaces. As a result the SAX callback was generating
2602 * ignorableWhitespace() callbacks instead of characters() one, and when
2603 * using the DOM output text nodes containing those blanks were not generated.
2604 * The 2.x and later version will switch to the XML standard way and
2605 * ignorableWhitespace() are only generated when running the parser in
2606 * validating mode and when the current element doesn't allow CDATA or
2607 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002608 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002609 * on 1.X libs and to switch back to the old mode for compatibility when
2610 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2611 * by using xmlIsBlankNode() commodity function to detect the "empty"
2612 * nodes generated.
2613 * This value also affect autogeneration of indentation when saving code
2614 * if blanks sections are kept, indentation is not generated.
2615 *
2616 * Returns the last value for 0 for no substitution, 1 for substitution.
2617 */
2618
2619int
2620xmlKeepBlanksDefault(int val) {
2621 int old = xmlKeepBlanksDefaultValue;
2622
2623 xmlKeepBlanksDefaultValue = val;
2624 xmlIndentTreeOutput = !val;
2625 return(old);
2626}
2627
2628/************************************************************************
2629 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002630 * Deprecated functions kept for compatibility *
2631 * *
2632 ************************************************************************/
2633
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002634/**
2635 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002636 * @lang: pointer to the string value
2637 *
2638 * Checks that the value conforms to the LanguageID production:
2639 *
2640 * NOTE: this is somewhat deprecated, those productions were removed from
2641 * the XML Second edition.
2642 *
2643 * [33] LanguageID ::= Langcode ('-' Subcode)*
2644 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2645 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2646 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2647 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2648 * [38] Subcode ::= ([a-z] | [A-Z])+
2649 *
2650 * Returns 1 if correct 0 otherwise
2651 **/
2652int
2653xmlCheckLanguageID(const xmlChar *lang) {
2654 const xmlChar *cur = lang;
2655
2656 if (cur == NULL)
2657 return(0);
2658 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2659 ((cur[0] == 'I') && (cur[1] == '-'))) {
2660 /*
2661 * IANA code
2662 */
2663 cur += 2;
2664 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2665 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2666 cur++;
2667 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2668 ((cur[0] == 'X') && (cur[1] == '-'))) {
2669 /*
2670 * User code
2671 */
2672 cur += 2;
2673 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2674 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2675 cur++;
2676 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2677 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2678 /*
2679 * ISO639
2680 */
2681 cur++;
2682 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2683 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2684 cur++;
2685 else
2686 return(0);
2687 } else
2688 return(0);
2689 while (cur[0] != 0) { /* non input consuming */
2690 if (cur[0] != '-')
2691 return(0);
2692 cur++;
2693 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2694 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2695 cur++;
2696 else
2697 return(0);
2698 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2699 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2700 cur++;
2701 }
2702 return(1);
2703}
2704
2705/**
2706 * xmlDecodeEntities:
2707 * @ctxt: the parser context
2708 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2709 * @len: the len to decode (in bytes !), -1 for no size limit
2710 * @end: an end marker xmlChar, 0 if none
2711 * @end2: an end marker xmlChar, 0 if none
2712 * @end3: an end marker xmlChar, 0 if none
2713 *
2714 * This function is deprecated, we now always process entities content
2715 * through xmlStringDecodeEntities
2716 *
2717 * TODO: remove it in next major release.
2718 *
2719 * [67] Reference ::= EntityRef | CharRef
2720 *
2721 * [69] PEReference ::= '%' Name ';'
2722 *
2723 * Returns A newly allocated string with the substitution done. The caller
2724 * must deallocate it !
2725 */
2726xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002727xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2728 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002729#if 0
2730 xmlChar *buffer = NULL;
2731 unsigned int buffer_size = 0;
2732 unsigned int nbchars = 0;
2733
2734 xmlChar *current = NULL;
2735 xmlEntityPtr ent;
2736 unsigned int max = (unsigned int) len;
2737 int c,l;
2738#endif
2739
2740 static int deprecated = 0;
2741 if (!deprecated) {
2742 xmlGenericError(xmlGenericErrorContext,
2743 "xmlDecodeEntities() deprecated function reached\n");
2744 deprecated = 1;
2745 }
2746
2747#if 0
2748 if (ctxt->depth > 40) {
2749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2750 ctxt->sax->error(ctxt->userData,
2751 "Detected entity reference loop\n");
2752 ctxt->wellFormed = 0;
2753 ctxt->disableSAX = 1;
2754 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2755 return(NULL);
2756 }
2757
2758 /*
2759 * allocate a translation buffer.
2760 */
2761 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2762 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2763 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002764 xmlGenericError(xmlGenericErrorContext,
2765 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002766 return(NULL);
2767 }
2768
2769 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002770 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002771 */
2772 GROW;
2773 c = CUR_CHAR(l);
2774 while ((nbchars < max) && (c != end) && /* NOTUSED */
2775 (c != end2) && (c != end3)) {
2776 GROW;
2777 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002778 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002779 int val = xmlParseCharRef(ctxt);
2780 COPY_BUF(0,buffer,nbchars,val);
2781 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002782 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002783 (what & XML_SUBSTITUTE_REF)) {
2784 if (xmlParserDebugEntities)
2785 xmlGenericError(xmlGenericErrorContext,
2786 "decoding Entity Reference\n");
2787 ent = xmlParseEntityRef(ctxt);
2788 if ((ent != NULL) &&
2789 (ctxt->replaceEntities != 0)) {
2790 current = ent->content;
2791 while (*current != 0) { /* non input consuming loop */
2792 buffer[nbchars++] = *current++;
2793 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2794 growBuffer(buffer);
2795 }
2796 }
2797 } else if (ent != NULL) {
2798 const xmlChar *cur = ent->name;
2799
2800 buffer[nbchars++] = '&';
2801 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2802 growBuffer(buffer);
2803 }
2804 while (*cur != 0) { /* non input consuming loop */
2805 buffer[nbchars++] = *cur++;
2806 }
2807 buffer[nbchars++] = ';';
2808 }
2809 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2810 /*
2811 * a PEReference induce to switch the entity flow,
2812 * we break here to flush the current set of chars
2813 * parsed if any. We will be called back later.
2814 */
2815 if (xmlParserDebugEntities)
2816 xmlGenericError(xmlGenericErrorContext,
2817 "decoding PE Reference\n");
2818 if (nbchars != 0) break;
2819
2820 xmlParsePEReference(ctxt);
2821
2822 /*
2823 * Pop-up of finished entities.
2824 */
2825 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2826 xmlPopInput(ctxt);
2827
2828 break;
2829 } else {
2830 COPY_BUF(l,buffer,nbchars,c);
2831 NEXTL(l);
2832 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2833 growBuffer(buffer);
2834 }
2835 }
2836 c = CUR_CHAR(l);
2837 }
2838 buffer[nbchars++] = 0;
2839 return(buffer);
2840#endif
2841 return(NULL);
2842}
2843
2844/**
2845 * xmlNamespaceParseNCName:
2846 * @ctxt: an XML parser context
2847 *
2848 * parse an XML namespace name.
2849 *
2850 * TODO: this seems not in use anymore, the namespace handling is done on
2851 * top of the SAX interfaces, i.e. not on raw input.
2852 *
2853 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2854 *
2855 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2856 * CombiningChar | Extender
2857 *
2858 * Returns the namespace name or NULL
2859 */
2860
2861xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002862xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002863#if 0
2864 xmlChar buf[XML_MAX_NAMELEN + 5];
2865 int len = 0, l;
2866 int cur = CUR_CHAR(l);
2867#endif
2868
2869 static int deprecated = 0;
2870 if (!deprecated) {
2871 xmlGenericError(xmlGenericErrorContext,
2872 "xmlNamespaceParseNCName() deprecated function reached\n");
2873 deprecated = 1;
2874 }
2875
2876#if 0
2877 /* load first the value of the char !!! */
2878 GROW;
2879 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2880
2881xmlGenericError(xmlGenericErrorContext,
2882 "xmlNamespaceParseNCName: reached loop 3\n");
2883 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2884 (cur == '.') || (cur == '-') ||
2885 (cur == '_') ||
2886 (IS_COMBINING(cur)) ||
2887 (IS_EXTENDER(cur))) {
2888 COPY_BUF(l,buf,len,cur);
2889 NEXTL(l);
2890 cur = CUR_CHAR(l);
2891 if (len >= XML_MAX_NAMELEN) {
2892 xmlGenericError(xmlGenericErrorContext,
2893 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2894 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2895 (cur == '.') || (cur == '-') ||
2896 (cur == '_') ||
2897 (IS_COMBINING(cur)) ||
2898 (IS_EXTENDER(cur))) {
2899 NEXTL(l);
2900 cur = CUR_CHAR(l);
2901 }
2902 break;
2903 }
2904 }
2905 return(xmlStrndup(buf, len));
2906#endif
2907 return(NULL);
2908}
2909
2910/**
2911 * xmlNamespaceParseQName:
2912 * @ctxt: an XML parser context
2913 * @prefix: a xmlChar **
2914 *
2915 * TODO: this seems not in use anymore, the namespace handling is done on
2916 * top of the SAX interfaces, i.e. not on raw input.
2917 *
2918 * parse an XML qualified name
2919 *
2920 * [NS 5] QName ::= (Prefix ':')? LocalPart
2921 *
2922 * [NS 6] Prefix ::= NCName
2923 *
2924 * [NS 7] LocalPart ::= NCName
2925 *
2926 * Returns the local part, and prefix is updated
2927 * to get the Prefix if any.
2928 */
2929
2930xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002931xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002932
2933 static int deprecated = 0;
2934 if (!deprecated) {
2935 xmlGenericError(xmlGenericErrorContext,
2936 "xmlNamespaceParseQName() deprecated function reached\n");
2937 deprecated = 1;
2938 }
2939
2940#if 0
2941 xmlChar *ret = NULL;
2942
2943 *prefix = NULL;
2944 ret = xmlNamespaceParseNCName(ctxt);
2945 if (RAW == ':') {
2946 *prefix = ret;
2947 NEXT;
2948 ret = xmlNamespaceParseNCName(ctxt);
2949 }
2950
2951 return(ret);
2952#endif
2953 return(NULL);
2954}
2955
2956/**
2957 * xmlNamespaceParseNSDef:
2958 * @ctxt: an XML parser context
2959 *
2960 * parse a namespace prefix declaration
2961 *
2962 * TODO: this seems not in use anymore, the namespace handling is done on
2963 * top of the SAX interfaces, i.e. not on raw input.
2964 *
2965 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2966 *
2967 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2968 *
2969 * Returns the namespace name
2970 */
2971
2972xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002973xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002974 static int deprecated = 0;
2975 if (!deprecated) {
2976 xmlGenericError(xmlGenericErrorContext,
2977 "xmlNamespaceParseNSDef() deprecated function reached\n");
2978 deprecated = 1;
2979 }
2980 return(NULL);
2981#if 0
2982 xmlChar *name = NULL;
2983
2984 if ((RAW == 'x') && (NXT(1) == 'm') &&
2985 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2986 (NXT(4) == 's')) {
2987 SKIP(5);
2988 if (RAW == ':') {
2989 NEXT;
2990 name = xmlNamespaceParseNCName(ctxt);
2991 }
2992 }
2993 return(name);
2994#endif
2995}
2996
2997/**
2998 * xmlParseQuotedString:
2999 * @ctxt: an XML parser context
3000 *
3001 * Parse and return a string between quotes or doublequotes
3002 *
3003 * TODO: Deprecated, to be removed at next drop of binary compatibility
3004 *
3005 * Returns the string parser or NULL.
3006 */
3007xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003008xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003009 static int deprecated = 0;
3010 if (!deprecated) {
3011 xmlGenericError(xmlGenericErrorContext,
3012 "xmlParseQuotedString() deprecated function reached\n");
3013 deprecated = 1;
3014 }
3015 return(NULL);
3016
3017#if 0
3018 xmlChar *buf = NULL;
3019 int len = 0,l;
3020 int size = XML_PARSER_BUFFER_SIZE;
3021 int c;
3022
3023 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3024 if (buf == NULL) {
3025 xmlGenericError(xmlGenericErrorContext,
3026 "malloc of %d byte failed\n", size);
3027 return(NULL);
3028 }
3029xmlGenericError(xmlGenericErrorContext,
3030 "xmlParseQuotedString: reached loop 4\n");
3031 if (RAW == '"') {
3032 NEXT;
3033 c = CUR_CHAR(l);
3034 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3035 if (len + 5 >= size) {
3036 size *= 2;
3037 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3038 if (buf == NULL) {
3039 xmlGenericError(xmlGenericErrorContext,
3040 "realloc of %d byte failed\n", size);
3041 return(NULL);
3042 }
3043 }
3044 COPY_BUF(l,buf,len,c);
3045 NEXTL(l);
3046 c = CUR_CHAR(l);
3047 }
3048 if (c != '"') {
3049 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3051 ctxt->sax->error(ctxt->userData,
3052 "String not closed \"%.50s\"\n", buf);
3053 ctxt->wellFormed = 0;
3054 ctxt->disableSAX = 1;
3055 } else {
3056 NEXT;
3057 }
3058 } else if (RAW == '\''){
3059 NEXT;
3060 c = CUR;
3061 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3062 if (len + 1 >= size) {
3063 size *= 2;
3064 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3065 if (buf == NULL) {
3066 xmlGenericError(xmlGenericErrorContext,
3067 "realloc of %d byte failed\n", size);
3068 return(NULL);
3069 }
3070 }
3071 buf[len++] = c;
3072 NEXT;
3073 c = CUR;
3074 }
3075 if (RAW != '\'') {
3076 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3078 ctxt->sax->error(ctxt->userData,
3079 "String not closed \"%.50s\"\n", buf);
3080 ctxt->wellFormed = 0;
3081 ctxt->disableSAX = 1;
3082 } else {
3083 NEXT;
3084 }
3085 }
3086 return(buf);
3087#endif
3088}
3089
3090/**
3091 * xmlParseNamespace:
3092 * @ctxt: an XML parser context
3093 *
3094 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3095 *
3096 * This is what the older xml-name Working Draft specified, a bunch of
3097 * other stuff may still rely on it, so support is still here as
3098 * if it was declared on the root of the Tree:-(
3099 *
3100 * TODO: remove from library
3101 *
3102 * To be removed at next drop of binary compatibility
3103 */
3104
3105void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003106xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003107 static int deprecated = 0;
3108 if (!deprecated) {
3109 xmlGenericError(xmlGenericErrorContext,
3110 "xmlParseNamespace() deprecated function reached\n");
3111 deprecated = 1;
3112 }
3113
3114#if 0
3115 xmlChar *href = NULL;
3116 xmlChar *prefix = NULL;
3117 int garbage = 0;
3118
3119 /*
3120 * We just skipped "namespace" or "xml:namespace"
3121 */
3122 SKIP_BLANKS;
3123
3124xmlGenericError(xmlGenericErrorContext,
3125 "xmlParseNamespace: reached loop 5\n");
3126 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3127 /*
3128 * We can have "ns" or "prefix" attributes
3129 * Old encoding as 'href' or 'AS' attributes is still supported
3130 */
3131 if ((RAW == 'n') && (NXT(1) == 's')) {
3132 garbage = 0;
3133 SKIP(2);
3134 SKIP_BLANKS;
3135
3136 if (RAW != '=') continue;
3137 NEXT;
3138 SKIP_BLANKS;
3139
3140 href = xmlParseQuotedString(ctxt);
3141 SKIP_BLANKS;
3142 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3143 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3144 garbage = 0;
3145 SKIP(4);
3146 SKIP_BLANKS;
3147
3148 if (RAW != '=') continue;
3149 NEXT;
3150 SKIP_BLANKS;
3151
3152 href = xmlParseQuotedString(ctxt);
3153 SKIP_BLANKS;
3154 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3155 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3156 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3157 garbage = 0;
3158 SKIP(6);
3159 SKIP_BLANKS;
3160
3161 if (RAW != '=') continue;
3162 NEXT;
3163 SKIP_BLANKS;
3164
3165 prefix = xmlParseQuotedString(ctxt);
3166 SKIP_BLANKS;
3167 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3168 garbage = 0;
3169 SKIP(2);
3170 SKIP_BLANKS;
3171
3172 if (RAW != '=') continue;
3173 NEXT;
3174 SKIP_BLANKS;
3175
3176 prefix = xmlParseQuotedString(ctxt);
3177 SKIP_BLANKS;
3178 } else if ((RAW == '?') && (NXT(1) == '>')) {
3179 garbage = 0;
3180 NEXT;
3181 } else {
3182 /*
3183 * Found garbage when parsing the namespace
3184 */
3185 if (!garbage) {
3186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3187 ctxt->sax->error(ctxt->userData,
3188 "xmlParseNamespace found garbage\n");
3189 }
3190 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3191 ctxt->wellFormed = 0;
3192 ctxt->disableSAX = 1;
3193 NEXT;
3194 }
3195 }
3196
3197 MOVETO_ENDTAG(CUR_PTR);
3198 NEXT;
3199
3200 /*
3201 * Register the DTD.
3202 if (href != NULL)
3203 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3204 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3205 */
3206
3207 if (prefix != NULL) xmlFree(prefix);
3208 if (href != NULL) xmlFree(href);
3209#endif
3210}
3211
3212/**
3213 * xmlScanName:
3214 * @ctxt: an XML parser context
3215 *
3216 * Trickery: parse an XML name but without consuming the input flow
3217 * Needed for rollback cases. Used only when parsing entities references.
3218 *
3219 * TODO: seems deprecated now, only used in the default part of
3220 * xmlParserHandleReference
3221 *
3222 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3223 * CombiningChar | Extender
3224 *
3225 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3226 *
3227 * [6] Names ::= Name (S Name)*
3228 *
3229 * Returns the Name parsed or NULL
3230 */
3231
3232xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003233xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003234 static int deprecated = 0;
3235 if (!deprecated) {
3236 xmlGenericError(xmlGenericErrorContext,
3237 "xmlScanName() deprecated function reached\n");
3238 deprecated = 1;
3239 }
3240 return(NULL);
3241
3242#if 0
3243 xmlChar buf[XML_MAX_NAMELEN];
3244 int len = 0;
3245
3246 GROW;
3247 if (!IS_LETTER(RAW) && (RAW != '_') &&
3248 (RAW != ':')) {
3249 return(NULL);
3250 }
3251
3252
3253 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3254 (NXT(len) == '.') || (NXT(len) == '-') ||
3255 (NXT(len) == '_') || (NXT(len) == ':') ||
3256 (IS_COMBINING(NXT(len))) ||
3257 (IS_EXTENDER(NXT(len)))) {
3258 GROW;
3259 buf[len] = NXT(len);
3260 len++;
3261 if (len >= XML_MAX_NAMELEN) {
3262 xmlGenericError(xmlGenericErrorContext,
3263 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3264 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3265 (IS_DIGIT(NXT(len))) ||
3266 (NXT(len) == '.') || (NXT(len) == '-') ||
3267 (NXT(len) == '_') || (NXT(len) == ':') ||
3268 (IS_COMBINING(NXT(len))) ||
3269 (IS_EXTENDER(NXT(len))))
3270 len++;
3271 break;
3272 }
3273 }
3274 return(xmlStrndup(buf, len));
3275#endif
3276}
3277
3278/**
3279 * xmlParserHandleReference:
3280 * @ctxt: the parser context
3281 *
3282 * TODO: Remove, now deprecated ... the test is done directly in the
3283 * content parsing
3284 * routines.
3285 *
3286 * [67] Reference ::= EntityRef | CharRef
3287 *
3288 * [68] EntityRef ::= '&' Name ';'
3289 *
3290 * [ WFC: Entity Declared ]
3291 * the Name given in the entity reference must match that in an entity
3292 * declaration, except that well-formed documents need not declare any
3293 * of the following entities: amp, lt, gt, apos, quot.
3294 *
3295 * [ WFC: Parsed Entity ]
3296 * An entity reference must not contain the name of an unparsed entity
3297 *
3298 * [66] CharRef ::= '&#' [0-9]+ ';' |
3299 * '&#x' [0-9a-fA-F]+ ';'
3300 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003301 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003302 * the handling is done accordingly to
3303 * http://www.w3.org/TR/REC-xml#entproc
3304 */
3305void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003306xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003307 static int deprecated = 0;
3308 if (!deprecated) {
3309 xmlGenericError(xmlGenericErrorContext,
3310 "xmlParserHandleReference() deprecated function reached\n");
3311 deprecated = 1;
3312 }
3313
Owen Taylor3473f882001-02-23 17:55:21 +00003314 return;
3315}
3316
3317/**
3318 * xmlHandleEntity:
3319 * @ctxt: an XML parser context
3320 * @entity: an XML entity pointer.
3321 *
3322 * Default handling of defined entities, when should we define a new input
3323 * stream ? When do we just handle that as a set of chars ?
3324 *
3325 * OBSOLETE: to be removed at some point.
3326 */
3327
3328void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003329xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003330 static int deprecated = 0;
3331 if (!deprecated) {
3332 xmlGenericError(xmlGenericErrorContext,
3333 "xmlHandleEntity() deprecated function reached\n");
3334 deprecated = 1;
3335 }
3336
3337#if 0
3338 int len;
3339 xmlParserInputPtr input;
3340
3341 if (entity->content == NULL) {
3342 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3345 entity->name);
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 }
3350 len = xmlStrlen(entity->content);
3351 if (len <= 2) goto handle_as_char;
3352
3353 /*
3354 * Redefine its content as an input stream.
3355 */
3356 input = xmlNewEntityInputStream(ctxt, entity);
3357 xmlPushInput(ctxt, input);
3358 return;
3359
3360handle_as_char:
3361 /*
3362 * Just handle the content as a set of chars.
3363 */
3364 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3365 (ctxt->sax->characters != NULL))
3366 ctxt->sax->characters(ctxt->userData, entity->content, len);
3367#endif
3368}
3369
3370/**
3371 * xmlNewGlobalNs:
3372 * @doc: the document carrying the namespace
3373 * @href: the URI associated
3374 * @prefix: the prefix for the namespace
3375 *
3376 * Creation of a Namespace, the old way using PI and without scoping
3377 * DEPRECATED !!!
3378 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003379 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003380 */
3381xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003382xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3383 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003384 static int deprecated = 0;
3385 if (!deprecated) {
3386 xmlGenericError(xmlGenericErrorContext,
3387 "xmlNewGlobalNs() deprecated function reached\n");
3388 deprecated = 1;
3389 }
3390 return(NULL);
3391#if 0
3392 xmlNodePtr root;
3393
3394 xmlNsPtr cur;
3395
3396 root = xmlDocGetRootElement(doc);
3397 if (root != NULL)
3398 return(xmlNewNs(root, href, prefix));
3399
3400 /*
3401 * if there is no root element yet, create an old Namespace type
3402 * and it will be moved to the root at save time.
3403 */
3404 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3405 if (cur == NULL) {
3406 xmlGenericError(xmlGenericErrorContext,
3407 "xmlNewGlobalNs : malloc failed\n");
3408 return(NULL);
3409 }
3410 memset(cur, 0, sizeof(xmlNs));
3411 cur->type = XML_GLOBAL_NAMESPACE;
3412
3413 if (href != NULL)
3414 cur->href = xmlStrdup(href);
3415 if (prefix != NULL)
3416 cur->prefix = xmlStrdup(prefix);
3417
3418 /*
3419 * Add it at the end to preserve parsing order ...
3420 */
3421 if (doc != NULL) {
3422 if (doc->oldNs == NULL) {
3423 doc->oldNs = cur;
3424 } else {
3425 xmlNsPtr prev = doc->oldNs;
3426
3427 while (prev->next != NULL) prev = prev->next;
3428 prev->next = cur;
3429 }
3430 }
3431
3432 return(NULL);
3433#endif
3434}
3435
3436/**
3437 * xmlUpgradeOldNs:
3438 * @doc: a document pointer
3439 *
3440 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3441 * DEPRECATED
3442 */
3443void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003444xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003445 static int deprecated = 0;
3446 if (!deprecated) {
3447 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003448 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003449 deprecated = 1;
3450 }
3451#if 0
3452 xmlNsPtr cur;
3453
3454 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3455 if (doc->children == NULL) {
3456#ifdef DEBUG_TREE
3457 xmlGenericError(xmlGenericErrorContext,
3458 "xmlUpgradeOldNs: failed no root !\n");
3459#endif
3460 return;
3461 }
3462
3463 cur = doc->oldNs;
3464 while (cur->next != NULL) {
3465 cur->type = XML_LOCAL_NAMESPACE;
3466 cur = cur->next;
3467 }
3468 cur->type = XML_LOCAL_NAMESPACE;
3469 cur->next = doc->children->nsDef;
3470 doc->children->nsDef = doc->oldNs;
3471 doc->oldNs = NULL;
3472#endif
3473}
3474