blob: e74801fa456f7840a8bbee995cb92b622fc17236 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Owen Taylor3473f882001-02-23 17:55:21 +000061
Daniel Veillard5e2dace2001-07-18 19:30:27 +000062/**
Owen Taylor3473f882001-02-23 17:55:21 +000063 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
72
Daniel Veillard6f350292001-10-14 09:56:15 +000073 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000074
Owen Taylor3473f882001-02-23 17:55:21 +000075 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000079 fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000082 }
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
87 }
88}
89
90
Daniel Veillard22090732001-07-16 00:06:07 +000091static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000092 "validate",
93 "load subset",
94 "keep blanks",
95 "disable SAX",
96 "fetch external entities",
97 "substitute entities",
98 "gather line info",
99 "user data",
100 "is html",
101 "is standalone",
102 "stop parser",
103 "document",
104 "is well formed",
105 "is valid",
106 "SAX block",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
134};
135
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136/**
Owen Taylor3473f882001-02-23 17:55:21 +0000137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
140 *
141 * Copy at most *@len feature names into the @result array
142 *
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
146 */
147int
148xmlGetFeaturesList(int *len, const char **result) {
149 int ret, i;
150
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
153 return(ret);
154 if ((*len < 0) || (*len >= 1000))
155 return(-1);
156 if (*len > ret)
157 *len = ret;
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
160 return(ret);
161}
162
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163/**
Owen Taylor3473f882001-02-23 17:55:21 +0000164 * xmlGetFeature:
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
168 *
169 * Read the current value of one feature of this parser instance
170 *
171 * Returns -1 in case or error, 0 otherwise
172 */
173int
174xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
176 return(-1);
177
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
258 } else {
259 return(-1);
260 }
261 return(0);
262}
263
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000264/**
Owen Taylor3473f882001-02-23 17:55:21 +0000265 * xmlSetFeature:
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
269 *
270 * Change the current value of one feature of this parser instance
271 *
272 * Returns -1 in case or error, 0 otherwise
273 */
274int
275xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
277 return(-1);
278
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000286 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000287 }
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
367 } else {
368 return(-1);
369 }
370 return(0);
371}
372
373/************************************************************************
374 * *
375 * Some functions to avoid too large macros *
376 * *
377 ************************************************************************/
378
379/**
380 * xmlIsChar:
381 * @c: an unicode character (int)
382 *
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
388 *
389 * Returns 0 if not, non-zero otherwise
390 */
391int
392xmlIsChar(int c) {
393 return(
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
398}
399
400/**
401 * xmlIsBlank:
402 * @c: an unicode character (int)
403 *
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
407 *
408 * Returns 0 if not, non-zero otherwise
409 */
410int
411xmlIsBlank(int c) {
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
413}
414
415/**
416 * xmlIsBaseChar:
417 * @c: an unicode character (int)
418 *
419 * Check whether the character is allowed by the production
420 * [85] BaseChar ::= ... long list see REC ...
421 *
422 * VI is your friend !
423 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
424 * and
425 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
426 *
427 * Returns 0 if not, non-zero otherwise
428 */
429static int xmlBaseArray[] = {
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
434 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
443 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
446};
447
448int
449xmlIsBaseChar(int c) {
450 return(
451 (((c) < 0x0100) ? xmlBaseArray[c] :
452 ( /* accelerator */
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
463 ((c) == 0x0386) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
465 ((c) == 0x038C) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
469 ((c) == 0x03DA) ||
470 ((c) == 0x03DC) ||
471 ((c) == 0x03DE) ||
472 ((c) == 0x03E0) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
485 ((c) == 0x0559) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
495 ((c) == 0x06D5) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
499 ((c) == 0x093D) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
505 ((c) == 0x09B2) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
518 ((c) == 0x0A5E) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
521 ((c) == 0x0A8D) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
527 ((c) == 0x0ABD) ||
528 ((c) == 0x0AE0) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
535 ((c) == 0x0B3D) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
542 ((c) == 0x0B9C) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
559 ((c) == 0x0CDE) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
567 ((c) == 0x0E30) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
571 ((c) == 0x0E84) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
573 ((c) == 0x0E8A) ||
574 ((c) == 0x0E8D) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
578 ((c) == 0x0EA5) ||
579 ((c) == 0x0EA7) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
582 ((c) == 0x0EB0) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
584 ((c) == 0x0EBD) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
591 ((c) == 0x1100) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
594 ((c) == 0x1109) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
597 ((c) == 0x113C) ||
598 ((c) == 0x113E) ||
599 ((c) == 0x1140) ||
600 ((c) == 0x114C) ||
601 ((c) == 0x114E) ||
602 ((c) == 0x1150) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
604 ((c) == 0x1159) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
606 ((c) == 0x1163) ||
607 ((c) == 0x1165) ||
608 ((c) == 0x1167) ||
609 ((c) == 0x1169) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
612 ((c) == 0x1175) ||
613 ((c) == 0x119E) ||
614 ((c) == 0x11A8) ||
615 ((c) == 0x11AB) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
618 ((c) == 0x11BA) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
620 ((c) == 0x11EB) ||
621 ((c) == 0x11F0) ||
622 ((c) == 0x11F9) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
630 ((c) == 0x1F59) ||
631 ((c) == 0x1F5B) ||
632 ((c) == 0x1F5D) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
636 ((c) == 0x1FBE) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
644 ((c) == 0x2126) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
646 ((c) == 0x212E) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
652}
653
654/**
655 * xmlIsDigit:
656 * @c: an unicode character (int)
657 *
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
660 *
661 * Returns 0 if not, non-zero otherwise
662 */
663int
664xmlIsDigit(int c) {
665 return(
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
682}
683
684/**
685 * xmlIsCombining:
686 * @c: an unicode character (int)
687 *
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
690 *
691 * Returns 0 if not, non-zero otherwise
692 */
693int
694xmlIsCombining(int c) {
695 return(
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
703 ((c) == 0x05BF) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
705 ((c) == 0x05C4) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
707 ((c) == 0x0670) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
715 ((c) == 0x093C) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
717 ((c) == 0x094D) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
721 ((c) == 0x09BC) ||
722 ((c) == 0x09BE) ||
723 ((c) == 0x09BF) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
727 ((c) == 0x09D7) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
730 ((c) == 0x0A02) ||
731 ((c) == 0x0A3C) ||
732 ((c) == 0x0A3E) ||
733 ((c) == 0x0A3F) ||
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
739 ((c) == 0x0ABC) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
744 ((c) == 0x0B3C) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
753 ((c) == 0x0BD7) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
768 ((c) == 0x0D57) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
770 ((c) == 0x0E31) ||
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
773 ((c) == 0x0EB1) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
778 ((c) == 0x0F35) ||
779 ((c) == 0x0F37) ||
780 ((c) == 0x0F39) ||
781 ((c) == 0x0F3E) ||
782 ((c) == 0x0F3F) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
786 ((c) == 0x0F97) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
789 ((c) == 0x0FB9) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
791 ((c) == 0x20E1) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
793 ((c) == 0x3099) ||
794 ((c) == 0x309A))))))))));
795}
796
797/**
798 * xmlIsExtender:
799 * @c: an unicode character (int)
800 *
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
805 *
806 * Returns 0 if not, non-zero otherwise
807 */
808int
809xmlIsExtender(int c) {
810 switch (c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000815 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000816 return 1;
817 default:
818 return 0;
819 }
820}
821
822/**
823 * xmlIsIdeographic:
824 * @c: an unicode character (int)
825 *
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
828 *
829 * Returns 0 if not, non-zero otherwise
830 */
831int
832xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
837 ((c) == 0x3007));
838}
839
840/**
841 * xmlIsLetter:
842 * @c: an unicode character (int)
843 *
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
846 *
847 * Returns 0 if not, non-zero otherwise
848 */
849int
850xmlIsLetter(int c) {
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
852}
853
854/**
855 * xmlIsPubidChar:
856 * @c: an unicode character (int)
857 *
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
860 *
861 * Returns 0 if not, non-zero otherwise
862 */
863int
864xmlIsPubidChar(int c) {
865 return(
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
875}
876
877/************************************************************************
878 * *
879 * Input handling functions for progressive parsing *
880 * *
881 ************************************************************************/
882
883/* #define DEBUG_INPUT */
884/* #define DEBUG_STACK */
885/* #define DEBUG_PUSH */
886
887
888/* we need to keep enough input to show errors in context */
889#define LINE_LEN 80
890
891#ifdef DEBUG_INPUT
892#define CHECK_BUFFER(in) check_buffer(in)
893
894void check_buffer(xmlParserInputPtr in) {
895 if (in->base != in->buf->buffer->content) {
896 xmlGenericError(xmlGenericErrorContext,
897 "xmlParserInput: base mismatch problem\n");
898 }
899 if (in->cur < in->base) {
900 xmlGenericError(xmlGenericErrorContext,
901 "xmlParserInput: cur < base problem\n");
902 }
903 if (in->cur > in->base + in->buf->buffer->use) {
904 xmlGenericError(xmlGenericErrorContext,
905 "xmlParserInput: cur > base + use problem\n");
906 }
907 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
908 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
909 in->buf->buffer->use, in->buf->buffer->size);
910}
911
912#else
913#define CHECK_BUFFER(in)
914#endif
915
916
917/**
918 * xmlParserInputRead:
919 * @in: an XML parser input
920 * @len: an indicative size for the lookahead
921 *
922 * This function refresh the input for the parser. It doesn't try to
923 * preserve pointers to the input buffer, and discard already read data
924 *
925 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
926 * end of this entity
927 */
928int
929xmlParserInputRead(xmlParserInputPtr in, int len) {
930 int ret;
931 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000932 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934#ifdef DEBUG_INPUT
935 xmlGenericError(xmlGenericErrorContext, "Read\n");
936#endif
937 if (in->buf == NULL) return(-1);
938 if (in->base == NULL) return(-1);
939 if (in->cur == NULL) return(-1);
940 if (in->buf->buffer == NULL) return(-1);
941 if (in->buf->readcallback == NULL) return(-1);
942
943 CHECK_BUFFER(in);
944
945 used = in->cur - in->buf->buffer->content;
946 ret = xmlBufferShrink(in->buf->buffer, used);
947 if (ret > 0) {
948 in->cur -= ret;
949 in->consumed += ret;
950 }
951 ret = xmlParserInputBufferRead(in->buf, len);
952 if (in->base != in->buf->buffer->content) {
953 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000954 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000955 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000956 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000957 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000959 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000960 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000961
962 CHECK_BUFFER(in);
963
964 return(ret);
965}
966
967/**
968 * xmlParserInputGrow:
969 * @in: an XML parser input
970 * @len: an indicative size for the lookahead
971 *
972 * This function increase the input for the parser. It tries to
973 * preserve pointers to the input buffer, and keep already read data
974 *
975 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
976 * end of this entity
977 */
978int
979xmlParserInputGrow(xmlParserInputPtr in, int len) {
980 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000981 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000982
983#ifdef DEBUG_INPUT
984 xmlGenericError(xmlGenericErrorContext, "Grow\n");
985#endif
986 if (in->buf == NULL) return(-1);
987 if (in->base == NULL) return(-1);
988 if (in->cur == NULL) return(-1);
989 if (in->buf->buffer == NULL) return(-1);
990
991 CHECK_BUFFER(in);
992
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000993 indx = in->cur - in->base;
994 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995
996 CHECK_BUFFER(in);
997
998 return(0);
999 }
1000 if (in->buf->readcallback != NULL)
1001 ret = xmlParserInputBufferGrow(in->buf, len);
1002 else
1003 return(0);
1004
1005 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001006 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001007 * block, but we use it really as an integer to do some
1008 * pointer arithmetic. Insure will raise it as a bug but in
1009 * that specific case, that's not !
1010 */
1011 if (in->base != in->buf->buffer->content) {
1012 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001013 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001014 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001015 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001016 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001018 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001019 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001020
1021 CHECK_BUFFER(in);
1022
1023 return(ret);
1024}
1025
1026/**
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1029 *
1030 * This function removes used input for the parser.
1031 */
1032void
1033xmlParserInputShrink(xmlParserInputPtr in) {
1034 int used;
1035 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001036 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001037
1038#ifdef DEBUG_INPUT
1039 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1040#endif
1041 if (in->buf == NULL) return;
1042 if (in->base == NULL) return;
1043 if (in->cur == NULL) return;
1044 if (in->buf->buffer == NULL) return;
1045
1046 CHECK_BUFFER(in);
1047
1048 used = in->cur - in->buf->buffer->content;
1049 /*
1050 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001051 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001052 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001053 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001054 return;
1055 if (used > INPUT_CHUNK) {
1056 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057 if (ret > 0) {
1058 in->cur -= ret;
1059 in->consumed += ret;
1060 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001061 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001072 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001073 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001074 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001075 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001077 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001078 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001079
1080 CHECK_BUFFER(in);
1081}
1082
1083/************************************************************************
1084 * *
1085 * UTF8 character input and related functions *
1086 * *
1087 ************************************************************************/
1088
1089/**
1090 * xmlNextChar:
1091 * @ctxt: the XML parser context
1092 *
1093 * Skip to the next char input char.
1094 */
1095
1096void
1097xmlNextChar(xmlParserCtxtPtr ctxt) {
1098 if (ctxt->instate == XML_PARSER_EOF)
1099 return;
1100
1101 /*
1102 * 2.11 End-of-Line Handling
1103 * the literal two-character sequence "#xD#xA" or a standalone
1104 * literal #xD, an XML processor must pass to the application
1105 * the single character #xA.
1106 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00001107 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001108 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001109 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1110 (ctxt->instate != XML_PARSER_COMMENT)) {
1111 /*
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1114 * automatically.
1115 * the auto closing should be blocked in other cases
1116 */
1117 xmlPopInput(ctxt);
1118 } else {
1119 if (*(ctxt->input->cur) == '\n') {
1120 ctxt->input->line++; ctxt->input->col = 1;
1121 } else ctxt->input->col++;
1122 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 *
1132 * Check for the 0x110000 limit too
1133 */
1134 const unsigned char *cur = ctxt->input->cur;
1135 unsigned char c;
1136
1137 c = *cur;
1138 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001139 if (cur[1] == 0)
1140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1141 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001142 goto encoding_error;
1143 if ((c & 0xe0) == 0xe0) {
1144 unsigned int val;
1145
Daniel Veillard561b7f82002-03-20 21:55:57 +00001146 if (cur[2] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001149 goto encoding_error;
1150 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001151 if (cur[3] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001154 ((cur[3] & 0xc0) != 0x80))
1155 goto encoding_error;
1156 /* 4-byte code */
1157 ctxt->input->cur += 4;
1158 val = (cur[0] & 0x7) << 18;
1159 val |= (cur[1] & 0x3f) << 12;
1160 val |= (cur[2] & 0x3f) << 6;
1161 val |= cur[3] & 0x3f;
1162 } else {
1163 /* 3-byte code */
1164 ctxt->input->cur += 3;
1165 val = (cur[0] & 0xf) << 12;
1166 val |= (cur[1] & 0x3f) << 6;
1167 val |= cur[2] & 0x3f;
1168 }
1169 if (((val > 0xd7ff) && (val < 0xe000)) ||
1170 ((val > 0xfffd) && (val < 0x10000)) ||
1171 (val >= 0x110000)) {
1172 if ((ctxt->sax != NULL) &&
1173 (ctxt->sax->error != NULL))
1174 ctxt->sax->error(ctxt->userData,
1175 "Char 0x%X out of allowed range\n", val);
1176 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1177 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001179 }
1180 } else
1181 /* 2-byte code */
1182 ctxt->input->cur += 2;
1183 } else
1184 /* 1-byte code */
1185 ctxt->input->cur++;
1186 } else {
1187 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001188 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001189 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001190 * XML constructs only use < 128 chars
1191 */
1192 ctxt->input->cur++;
1193 }
1194 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001197 }
1198 } else {
1199 ctxt->input->cur++;
1200 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001201 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001204 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001205 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001206 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1208 xmlPopInput(ctxt);
1209 return;
1210encoding_error:
1211 /*
1212 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001213 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1216 * encoding !)
1217 */
1218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1219 ctxt->sax->error(ctxt->userData,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001222 ctxt->input->cur[0], ctxt->input->cur[1],
1223 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001224 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001225 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001226 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1227
1228 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001229 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001230 return;
1231}
1232
1233/**
1234 * xmlCurrentChar:
1235 * @ctxt: the XML parser context
1236 * @len: pointer to the length of the char read
1237 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001238 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001239 * bytes in the input buffer. Implement the end of line normalization:
1240 * 2.11 End-of-Line Handling
1241 * Wherever an external parsed entity or the literal entity value
1242 * of an internal parsed entity contains either the literal two-character
1243 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1244 * must pass to the application the single character #xA.
1245 * This behavior can conveniently be produced by normalizing all
1246 * line breaks to #xA on input, before parsing.)
1247 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001248 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001249 */
1250
1251int
1252xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1253 if (ctxt->instate == XML_PARSER_EOF)
1254 return(0);
1255
Daniel Veillard561b7f82002-03-20 21:55:57 +00001256 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1257 *len = 1;
1258 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1261 /*
1262 * We are supposed to handle UTF8, check it's valid
1263 * From rfc2044: encoding of the Unicode values on UTF-8:
1264 *
1265 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1266 * 0000 0000-0000 007F 0xxxxxxx
1267 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1268 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1269 *
1270 * Check for the 0x110000 limit too
1271 */
1272 const unsigned char *cur = ctxt->input->cur;
1273 unsigned char c;
1274 unsigned int val;
1275
1276 c = *cur;
1277 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001278 if (cur[1] == 0)
1279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1280 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001281 goto encoding_error;
1282 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001283
1284 if (cur[2] == 0)
1285 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1286 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001287 goto encoding_error;
1288 if ((c & 0xf0) == 0xf0) {
1289 if (cur[3] == 0)
1290 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001291 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001292 ((cur[3] & 0xc0) != 0x80))
1293 goto encoding_error;
1294 /* 4-byte code */
1295 *len = 4;
1296 val = (cur[0] & 0x7) << 18;
1297 val |= (cur[1] & 0x3f) << 12;
1298 val |= (cur[2] & 0x3f) << 6;
1299 val |= cur[3] & 0x3f;
1300 } else {
1301 /* 3-byte code */
1302 *len = 3;
1303 val = (cur[0] & 0xf) << 12;
1304 val |= (cur[1] & 0x3f) << 6;
1305 val |= cur[2] & 0x3f;
1306 }
1307 } else {
1308 /* 2-byte code */
1309 *len = 2;
1310 val = (cur[0] & 0x1f) << 6;
1311 val |= cur[1] & 0x3f;
1312 }
1313 if (!IS_CHAR(val)) {
1314 if ((ctxt->sax != NULL) &&
1315 (ctxt->sax->error != NULL))
1316 ctxt->sax->error(ctxt->userData,
1317 "Char 0x%X out of allowed range\n", val);
1318 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1319 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001321 }
1322 return(val);
1323 } else {
1324 /* 1-byte code */
1325 *len = 1;
1326 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001327 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001328 ctxt->nbChars++;
1329 ctxt->input->cur++;
1330 }
1331 return(0xA);
1332 }
1333 return((int) *ctxt->input->cur);
1334 }
1335 }
1336 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001337 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001338 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001339 * XML constructs only use < 128 chars
1340 */
1341 *len = 1;
1342 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001343 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001344 ctxt->nbChars++;
1345 ctxt->input->cur++;
1346 }
1347 return(0xA);
1348 }
1349 return((int) *ctxt->input->cur);
1350encoding_error:
1351 /*
Daniel Veillardd2ff0392002-11-22 12:28:38 +00001352 * An encoding problem may arise from a truncated input buffer
1353 * splitting a character in the middle. In that case do not raise
1354 * an error but return 0 to endicate an end of stream problem
1355 */
1356 if (ctxt->input->end - ctxt->input->cur < 4) {
1357 *len = 0;
1358 return(0);
1359 }
1360
1361 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001362 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001363 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001364 * declaration header. Report the error and switch the encoding
1365 * to ISO-Latin-1 (if you don't like this policy, just declare the
1366 * encoding !)
1367 */
1368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1369 ctxt->sax->error(ctxt->userData,
1370 "Input is not proper UTF-8, indicate encoding !\n");
1371 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001372 ctxt->input->cur[0], ctxt->input->cur[1],
1373 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001374 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001375 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001376 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1377
1378 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1379 *len = 1;
1380 return((int) *ctxt->input->cur);
1381}
1382
1383/**
1384 * xmlStringCurrentChar:
1385 * @ctxt: the XML parser context
1386 * @cur: pointer to the beginning of the char
1387 * @len: pointer to the length of the char read
1388 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001389 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001390 * bytes in the input buffer.
1391 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001392 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001393 */
1394
1395int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001396xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1397{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001398 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001399 /*
1400 * We are supposed to handle UTF8, check it's valid
1401 * From rfc2044: encoding of the Unicode values on UTF-8:
1402 *
1403 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1404 * 0000 0000-0000 007F 0xxxxxxx
1405 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1406 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1407 *
1408 * Check for the 0x110000 limit too
1409 */
1410 unsigned char c;
1411 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001412
Daniel Veillardd8224e02002-01-13 15:43:22 +00001413 c = *cur;
1414 if (c & 0x80) {
1415 if ((cur[1] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001418
Daniel Veillardd8224e02002-01-13 15:43:22 +00001419 if ((cur[2] & 0xc0) != 0x80)
1420 goto encoding_error;
1421 if ((c & 0xf0) == 0xf0) {
1422 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1423 goto encoding_error;
1424 /* 4-byte code */
1425 *len = 4;
1426 val = (cur[0] & 0x7) << 18;
1427 val |= (cur[1] & 0x3f) << 12;
1428 val |= (cur[2] & 0x3f) << 6;
1429 val |= cur[3] & 0x3f;
1430 } else {
1431 /* 3-byte code */
1432 *len = 3;
1433 val = (cur[0] & 0xf) << 12;
1434 val |= (cur[1] & 0x3f) << 6;
1435 val |= cur[2] & 0x3f;
1436 }
1437 } else {
1438 /* 2-byte code */
1439 *len = 2;
1440 val = (cur[0] & 0x1f) << 6;
1441 val |= cur[1] & 0x3f;
1442 }
1443 if (!IS_CHAR(val)) {
1444 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1445 (ctxt->sax->error != NULL))
1446 ctxt->sax->error(ctxt->userData,
1447 "Char 0x%X out of allowed range\n",
1448 val);
Daniel Veillardd076a202002-11-20 13:28:31 +00001449 if (ctxt != NULL) {
1450 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1451 ctxt->wellFormed = 0;
1452 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1453 }
Daniel Veillardd8224e02002-01-13 15:43:22 +00001454 }
1455 return (val);
1456 } else {
1457 /* 1-byte code */
1458 *len = 1;
1459 return ((int) *cur);
1460 }
Owen Taylor3473f882001-02-23 17:55:21 +00001461 }
1462 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001463 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001465 * XML constructs only use < 128 chars
1466 */
1467 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001468 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001469encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001470
Owen Taylor3473f882001-02-23 17:55:21 +00001471 /*
1472 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001473 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001474 * declaration header. Report the error and switch the encoding
1475 * to ISO-Latin-1 (if you don't like this policy, just declare the
1476 * encoding !)
1477 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001478 if (ctxt != NULL) {
1479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1480 ctxt->sax->error(ctxt->userData,
1481 "Input is not proper UTF-8, indicate encoding !\n");
1482 ctxt->sax->error(ctxt->userData,
1483 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1484 ctxt->input->cur[0], ctxt->input->cur[1],
1485 ctxt->input->cur[2], ctxt->input->cur[3]);
1486 }
1487 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001488 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001489 }
Owen Taylor3473f882001-02-23 17:55:21 +00001490
1491 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001492 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001493}
1494
1495/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001496 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001497 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001498 * @val: the char value
1499 *
1500 * append the char value in the array
1501 *
1502 * Returns the number of xmlChar written
1503 */
Owen Taylor3473f882001-02-23 17:55:21 +00001504int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001505xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001506 /*
1507 * We are supposed to handle UTF8, check it's valid
1508 * From rfc2044: encoding of the Unicode values on UTF-8:
1509 *
1510 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1511 * 0000 0000-0000 007F 0xxxxxxx
1512 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1513 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1514 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001515 if (val >= 0x80) {
1516 xmlChar *savedout = out;
1517 int bits;
1518 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1519 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1520 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1521 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001522 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001523 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001524 val);
1525 return(0);
1526 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001527 for ( ; bits >= 0; bits-= 6)
1528 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1529 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 }
1531 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001532 return 1;
1533}
1534
1535/**
1536 * xmlCopyChar:
1537 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001539 * @val: the char value
1540 *
1541 * append the char value in the array
1542 *
1543 * Returns the number of xmlChar written
1544 */
1545
1546int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001547xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001548 /* the len parameter is ignored */
1549 if (val >= 0x80) {
1550 return(xmlCopyCharMultiByte (out, val));
1551 }
1552 *out = (xmlChar) val;
1553 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001554}
1555
1556/************************************************************************
1557 * *
1558 * Commodity functions to switch encodings *
1559 * *
1560 ************************************************************************/
1561
1562/**
1563 * xmlSwitchEncoding:
1564 * @ctxt: the parser context
1565 * @enc: the encoding value (number)
1566 *
1567 * change the input functions when discovering the character encoding
1568 * of a given entity.
1569 *
1570 * Returns 0 in case of success, -1 otherwise
1571 */
1572int
1573xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1574{
1575 xmlCharEncodingHandlerPtr handler;
1576
1577 switch (enc) {
1578 case XML_CHAR_ENCODING_ERROR:
1579 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1581 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1582 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001583 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001584 break;
1585 case XML_CHAR_ENCODING_NONE:
1586 /* let's assume it's UTF-8 without the XML decl */
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 return(0);
1589 case XML_CHAR_ENCODING_UTF8:
1590 /* default encoding, no conversion should be needed */
1591 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001592
1593 /*
1594 * Errata on XML-1.0 June 20 2001
1595 * Specific handling of the Byte Order Mark for
1596 * UTF-8
1597 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001598 if ((ctxt->input != NULL) &&
1599 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001600 (ctxt->input->cur[1] == 0xBB) &&
1601 (ctxt->input->cur[2] == 0xBF)) {
1602 ctxt->input->cur += 3;
1603 }
Owen Taylor3473f882001-02-23 17:55:21 +00001604 return(0);
1605 default:
1606 break;
1607 }
1608 handler = xmlGetCharEncodingHandler(enc);
1609 if (handler == NULL) {
1610 /*
1611 * Default handlers.
1612 */
1613 switch (enc) {
1614 case XML_CHAR_ENCODING_ERROR:
1615 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1618 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001619 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001620 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1621 break;
1622 case XML_CHAR_ENCODING_NONE:
1623 /* let's assume it's UTF-8 without the XML decl */
1624 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1625 return(0);
1626 case XML_CHAR_ENCODING_UTF8:
1627 case XML_CHAR_ENCODING_ASCII:
1628 /* default encoding, no conversion should be needed */
1629 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1630 return(0);
1631 case XML_CHAR_ENCODING_UTF16LE:
1632 break;
1633 case XML_CHAR_ENCODING_UTF16BE:
1634 break;
1635 case XML_CHAR_ENCODING_UCS4LE:
1636 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638 ctxt->sax->error(ctxt->userData,
1639 "char encoding USC4 little endian not supported\n");
1640 break;
1641 case XML_CHAR_ENCODING_UCS4BE:
1642 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "char encoding USC4 big endian not supported\n");
1646 break;
1647 case XML_CHAR_ENCODING_EBCDIC:
1648 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650 ctxt->sax->error(ctxt->userData,
1651 "char encoding EBCDIC not supported\n");
1652 break;
1653 case XML_CHAR_ENCODING_UCS4_2143:
1654 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "char encoding UCS4 2143 not supported\n");
1658 break;
1659 case XML_CHAR_ENCODING_UCS4_3412:
1660 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1662 ctxt->sax->error(ctxt->userData,
1663 "char encoding UCS4 3412 not supported\n");
1664 break;
1665 case XML_CHAR_ENCODING_UCS2:
1666 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668 ctxt->sax->error(ctxt->userData,
1669 "char encoding UCS2 not supported\n");
1670 break;
1671 case XML_CHAR_ENCODING_8859_1:
1672 case XML_CHAR_ENCODING_8859_2:
1673 case XML_CHAR_ENCODING_8859_3:
1674 case XML_CHAR_ENCODING_8859_4:
1675 case XML_CHAR_ENCODING_8859_5:
1676 case XML_CHAR_ENCODING_8859_6:
1677 case XML_CHAR_ENCODING_8859_7:
1678 case XML_CHAR_ENCODING_8859_8:
1679 case XML_CHAR_ENCODING_8859_9:
1680 /*
1681 * We used to keep the internal content in the
1682 * document encoding however this turns being unmaintainable
1683 * So xmlGetCharEncodingHandler() will return non-null
1684 * values for this now.
1685 */
1686 if ((ctxt->inputNr == 1) &&
1687 (ctxt->encoding == NULL) &&
1688 (ctxt->input->encoding != NULL)) {
1689 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1690 }
1691 ctxt->charset = enc;
1692 return(0);
1693 case XML_CHAR_ENCODING_2022_JP:
1694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696 ctxt->sax->error(ctxt->userData,
1697 "char encoding ISO-2022-JPnot supported\n");
1698 break;
1699 case XML_CHAR_ENCODING_SHIFT_JIS:
1700 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702 ctxt->sax->error(ctxt->userData,
1703 "char encoding Shift_JIS not supported\n");
1704 break;
1705 case XML_CHAR_ENCODING_EUC_JP:
1706 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1708 ctxt->sax->error(ctxt->userData,
1709 "char encoding EUC-JPnot supported\n");
1710 break;
1711 }
1712 }
1713 if (handler == NULL)
1714 return(-1);
1715 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1716 return(xmlSwitchToEncoding(ctxt, handler));
1717}
1718
1719/**
1720 * xmlSwitchToEncoding:
1721 * @ctxt: the parser context
1722 * @handler: the encoding handler
1723 *
1724 * change the input functions when discovering the character encoding
1725 * of a given entity.
1726 *
1727 * Returns 0 in case of success, -1 otherwise
1728 */
1729int
1730xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1731{
1732 int nbchars;
1733
1734 if (handler != NULL) {
1735 if (ctxt->input != NULL) {
1736 if (ctxt->input->buf != NULL) {
1737 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001738 /*
1739 * Check in case the auto encoding detetection triggered
1740 * in already.
1741 */
Owen Taylor3473f882001-02-23 17:55:21 +00001742 if (ctxt->input->buf->encoder == handler)
1743 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001744
1745 /*
1746 * "UTF-16" can be used for both LE and BE
1747 */
1748 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1749 BAD_CAST "UTF-16", 6)) &&
1750 (!xmlStrncmp(BAD_CAST handler->name,
1751 BAD_CAST "UTF-16", 6))) {
1752 return(0);
1753 }
1754
Owen Taylor3473f882001-02-23 17:55:21 +00001755 /*
1756 * Note: this is a bit dangerous, but that's what it
1757 * takes to use nearly compatible signature for different
1758 * encodings.
1759 */
1760 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1761 ctxt->input->buf->encoder = handler;
1762 return(0);
1763 }
1764 ctxt->input->buf->encoder = handler;
1765
1766 /*
1767 * Is there already some content down the pipe to convert ?
1768 */
1769 if ((ctxt->input->buf->buffer != NULL) &&
1770 (ctxt->input->buf->buffer->use > 0)) {
1771 int processed;
1772
1773 /*
1774 * Specific handling of the Byte Order Mark for
1775 * UTF-16
1776 */
1777 if ((handler->name != NULL) &&
1778 (!strcmp(handler->name, "UTF-16LE")) &&
1779 (ctxt->input->cur[0] == 0xFF) &&
1780 (ctxt->input->cur[1] == 0xFE)) {
1781 ctxt->input->cur += 2;
1782 }
1783 if ((handler->name != NULL) &&
1784 (!strcmp(handler->name, "UTF-16BE")) &&
1785 (ctxt->input->cur[0] == 0xFE) &&
1786 (ctxt->input->cur[1] == 0xFF)) {
1787 ctxt->input->cur += 2;
1788 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001789 /*
1790 * Errata on XML-1.0 June 20 2001
1791 * Specific handling of the Byte Order Mark for
1792 * UTF-8
1793 */
1794 if ((handler->name != NULL) &&
1795 (!strcmp(handler->name, "UTF-8")) &&
1796 (ctxt->input->cur[0] == 0xEF) &&
1797 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001798 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001799 ctxt->input->cur += 3;
1800 }
Owen Taylor3473f882001-02-23 17:55:21 +00001801
1802 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001803 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001804 * Move it as the raw buffer and create a new input buffer
1805 */
1806 processed = ctxt->input->cur - ctxt->input->base;
1807 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1808 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1809 ctxt->input->buf->buffer = xmlBufferCreate();
1810
1811 if (ctxt->html) {
1812 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001813 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001814 */
1815 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1816 ctxt->input->buf->buffer,
1817 ctxt->input->buf->raw);
1818 } else {
1819 /*
1820 * convert just enough to get
1821 * '<?xml version="1.0" encoding="xxx"?>'
1822 * parsed with the autodetected encoding
1823 * into the parser reading buffer.
1824 */
1825 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1826 ctxt->input->buf->buffer,
1827 ctxt->input->buf->raw);
1828 }
1829 if (nbchars < 0) {
1830 xmlGenericError(xmlGenericErrorContext,
1831 "xmlSwitchToEncoding: encoder error\n");
1832 return(-1);
1833 }
1834 ctxt->input->base =
1835 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001836 ctxt->input->end =
1837 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001838
1839 }
1840 return(0);
1841 } else {
1842 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1843 /*
1844 * When parsing a static memory array one must know the
1845 * size to be able to convert the buffer.
1846 */
1847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1848 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001849 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001850 return(-1);
1851 } else {
1852 int processed;
1853
1854 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 * Move it as the raw buffer and create a new input buffer
1857 */
1858 processed = ctxt->input->cur - ctxt->input->base;
1859
1860 ctxt->input->buf->raw = xmlBufferCreate();
1861 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1862 ctxt->input->length - processed);
1863 ctxt->input->buf->buffer = xmlBufferCreate();
1864
1865 /*
1866 * convert as much as possible of the raw input
1867 * to the parser reading buffer.
1868 */
1869 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1870 ctxt->input->buf->buffer,
1871 ctxt->input->buf->raw);
1872 if (nbchars < 0) {
1873 xmlGenericError(xmlGenericErrorContext,
1874 "xmlSwitchToEncoding: encoder error\n");
1875 return(-1);
1876 }
1877
1878 /*
1879 * Conversion succeeded, get rid of the old buffer
1880 */
1881 if ((ctxt->input->free != NULL) &&
1882 (ctxt->input->base != NULL))
1883 ctxt->input->free((xmlChar *) ctxt->input->base);
1884 ctxt->input->base =
1885 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001886 ctxt->input->end =
1887 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001888 }
1889 }
1890 } else {
1891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1892 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001893 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001894 return(-1);
1895 }
1896 /*
1897 * The parsing is now done in UTF8 natively
1898 */
1899 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1900 } else
1901 return(-1);
1902 return(0);
1903
1904}
1905
1906/************************************************************************
1907 * *
1908 * Commodity functions to handle entities processing *
1909 * *
1910 ************************************************************************/
1911
1912/**
1913 * xmlFreeInputStream:
1914 * @input: an xmlParserInputPtr
1915 *
1916 * Free up an input stream.
1917 */
1918void
1919xmlFreeInputStream(xmlParserInputPtr input) {
1920 if (input == NULL) return;
1921
1922 if (input->filename != NULL) xmlFree((char *) input->filename);
1923 if (input->directory != NULL) xmlFree((char *) input->directory);
1924 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1925 if (input->version != NULL) xmlFree((char *) input->version);
1926 if ((input->free != NULL) && (input->base != NULL))
1927 input->free((xmlChar *) input->base);
1928 if (input->buf != NULL)
1929 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001930 xmlFree(input);
1931}
1932
1933/**
1934 * xmlNewInputStream:
1935 * @ctxt: an XML parser context
1936 *
1937 * Create a new input stream structure
1938 * Returns the new input stream or NULL
1939 */
1940xmlParserInputPtr
1941xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1942 xmlParserInputPtr input;
1943
1944 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1945 if (input == NULL) {
1946 if (ctxt != NULL) {
1947 ctxt->errNo = XML_ERR_NO_MEMORY;
1948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1949 ctxt->sax->error(ctxt->userData,
1950 "malloc: couldn't allocate a new input stream\n");
1951 ctxt->errNo = XML_ERR_NO_MEMORY;
1952 }
1953 return(NULL);
1954 }
1955 memset(input, 0, sizeof(xmlParserInput));
1956 input->line = 1;
1957 input->col = 1;
1958 input->standalone = -1;
1959 return(input);
1960}
1961
1962/**
1963 * xmlNewIOInputStream:
1964 * @ctxt: an XML parser context
1965 * @input: an I/O Input
1966 * @enc: the charset encoding if known
1967 *
1968 * Create a new input stream structure encapsulating the @input into
1969 * a stream suitable for the parser.
1970 *
1971 * Returns the new input stream or NULL
1972 */
1973xmlParserInputPtr
1974xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1975 xmlCharEncoding enc) {
1976 xmlParserInputPtr inputStream;
1977
1978 if (xmlParserDebugEntities)
1979 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1980 inputStream = xmlNewInputStream(ctxt);
1981 if (inputStream == NULL) {
1982 return(NULL);
1983 }
1984 inputStream->filename = NULL;
1985 inputStream->buf = input;
1986 inputStream->base = inputStream->buf->buffer->content;
1987 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001988 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001989 if (enc != XML_CHAR_ENCODING_NONE) {
1990 xmlSwitchEncoding(ctxt, enc);
1991 }
1992
1993 return(inputStream);
1994}
1995
1996/**
1997 * xmlNewEntityInputStream:
1998 * @ctxt: an XML parser context
1999 * @entity: an Entity pointer
2000 *
2001 * Create a new input stream based on an xmlEntityPtr
2002 *
2003 * Returns the new input stream or NULL
2004 */
2005xmlParserInputPtr
2006xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2007 xmlParserInputPtr input;
2008
2009 if (entity == NULL) {
2010 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData,
2013 "internal: xmlNewEntityInputStream entity = NULL\n");
2014 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2015 return(NULL);
2016 }
2017 if (xmlParserDebugEntities)
2018 xmlGenericError(xmlGenericErrorContext,
2019 "new input from entity: %s\n", entity->name);
2020 if (entity->content == NULL) {
2021 switch (entity->etype) {
2022 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2023 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2025 ctxt->sax->error(ctxt->userData,
2026 "xmlNewEntityInputStream unparsed entity !\n");
2027 break;
2028 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2029 case XML_EXTERNAL_PARAMETER_ENTITY:
2030 return(xmlLoadExternalEntity((char *) entity->URI,
2031 (char *) entity->ExternalID, ctxt));
2032 case XML_INTERNAL_GENERAL_ENTITY:
2033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2034 ctxt->sax->error(ctxt->userData,
2035 "Internal entity %s without content !\n", entity->name);
2036 break;
2037 case XML_INTERNAL_PARAMETER_ENTITY:
2038 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2040 ctxt->sax->error(ctxt->userData,
2041 "Internal parameter entity %s without content !\n", entity->name);
2042 break;
2043 case XML_INTERNAL_PREDEFINED_ENTITY:
2044 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2046 ctxt->sax->error(ctxt->userData,
2047 "Predefined entity %s without content !\n", entity->name);
2048 break;
2049 }
2050 return(NULL);
2051 }
2052 input = xmlNewInputStream(ctxt);
2053 if (input == NULL) {
2054 return(NULL);
2055 }
2056 input->filename = (char *) entity->URI;
2057 input->base = entity->content;
2058 input->cur = entity->content;
2059 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002060 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002061 return(input);
2062}
2063
2064/**
2065 * xmlNewStringInputStream:
2066 * @ctxt: an XML parser context
2067 * @buffer: an memory buffer
2068 *
2069 * Create a new input stream based on a memory buffer.
2070 * Returns the new input stream
2071 */
2072xmlParserInputPtr
2073xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2074 xmlParserInputPtr input;
2075
2076 if (buffer == NULL) {
2077 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2079 ctxt->sax->error(ctxt->userData,
2080 "internal: xmlNewStringInputStream string = NULL\n");
2081 return(NULL);
2082 }
2083 if (xmlParserDebugEntities)
2084 xmlGenericError(xmlGenericErrorContext,
2085 "new fixed input: %.30s\n", buffer);
2086 input = xmlNewInputStream(ctxt);
2087 if (input == NULL) {
2088 return(NULL);
2089 }
2090 input->base = buffer;
2091 input->cur = buffer;
2092 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002093 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002094 return(input);
2095}
2096
2097/**
2098 * xmlNewInputFromFile:
2099 * @ctxt: an XML parser context
2100 * @filename: the filename to use as entity
2101 *
2102 * Create a new input stream based on a file.
2103 *
2104 * Returns the new input stream or NULL in case of error
2105 */
2106xmlParserInputPtr
2107xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2108 xmlParserInputBufferPtr buf;
2109 xmlParserInputPtr inputStream;
2110 char *directory = NULL;
2111 xmlChar *URI = NULL;
2112
2113 if (xmlParserDebugEntities)
2114 xmlGenericError(xmlGenericErrorContext,
2115 "new input from file: %s\n", filename);
2116 if (ctxt == NULL) return(NULL);
2117 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2118 if (buf == NULL)
2119 return(NULL);
2120
2121 URI = xmlStrdup((xmlChar *) filename);
2122 directory = xmlParserGetDirectory((const char *) URI);
2123
2124 inputStream = xmlNewInputStream(ctxt);
2125 if (inputStream == NULL) {
2126 if (directory != NULL) xmlFree((char *) directory);
2127 if (URI != NULL) xmlFree((char *) URI);
2128 return(NULL);
2129 }
2130
2131 inputStream->filename = (const char *) URI;
2132 inputStream->directory = directory;
2133 inputStream->buf = buf;
2134
2135 inputStream->base = inputStream->buf->buffer->content;
2136 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002137 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002138 if ((ctxt->directory == NULL) && (directory != NULL))
2139 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2140 return(inputStream);
2141}
2142
2143/************************************************************************
2144 * *
2145 * Commodity functions to handle parser contexts *
2146 * *
2147 ************************************************************************/
2148
2149/**
2150 * xmlInitParserCtxt:
2151 * @ctxt: an XML parser context
2152 *
2153 * Initialize a parser context
2154 */
2155
2156void
2157xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2158{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002159 if(ctxt==NULL) {
2160 xmlGenericError(xmlGenericErrorContext,
2161 "xmlInitParserCtxt: NULL context given\n");
2162 return;
2163 }
2164
Owen Taylor3473f882001-02-23 17:55:21 +00002165 xmlDefaultSAXHandlerInit();
2166
William M. Brack8b2c7f12002-11-22 05:07:29 +00002167 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2168 if (ctxt->sax == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00002169 xmlGenericError(xmlGenericErrorContext,
2170 "xmlInitParserCtxt: out of memory\n");
2171 }
2172 else
William M. Brack8b2c7f12002-11-22 05:07:29 +00002173 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00002174
2175 /* Allocate the Input stack */
2176 ctxt->inputTab = (xmlParserInputPtr *)
2177 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2178 if (ctxt->inputTab == NULL) {
2179 xmlGenericError(xmlGenericErrorContext,
2180 "xmlInitParserCtxt: out of memory\n");
2181 ctxt->inputNr = 0;
2182 ctxt->inputMax = 0;
2183 ctxt->input = NULL;
2184 return;
2185 }
2186 ctxt->inputNr = 0;
2187 ctxt->inputMax = 5;
2188 ctxt->input = NULL;
2189
2190 ctxt->version = NULL;
2191 ctxt->encoding = NULL;
2192 ctxt->standalone = -1;
2193 ctxt->hasExternalSubset = 0;
2194 ctxt->hasPErefs = 0;
2195 ctxt->html = 0;
2196 ctxt->external = 0;
2197 ctxt->instate = XML_PARSER_START;
2198 ctxt->token = 0;
2199 ctxt->directory = NULL;
2200
2201 /* Allocate the Node stack */
2202 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2203 if (ctxt->nodeTab == NULL) {
2204 xmlGenericError(xmlGenericErrorContext,
2205 "xmlInitParserCtxt: out of memory\n");
2206 ctxt->nodeNr = 0;
2207 ctxt->nodeMax = 0;
2208 ctxt->node = NULL;
2209 ctxt->inputNr = 0;
2210 ctxt->inputMax = 0;
2211 ctxt->input = NULL;
2212 return;
2213 }
2214 ctxt->nodeNr = 0;
2215 ctxt->nodeMax = 10;
2216 ctxt->node = NULL;
2217
2218 /* Allocate the Name stack */
2219 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2220 if (ctxt->nameTab == NULL) {
2221 xmlGenericError(xmlGenericErrorContext,
2222 "xmlInitParserCtxt: out of memory\n");
2223 ctxt->nodeNr = 0;
2224 ctxt->nodeMax = 0;
2225 ctxt->node = NULL;
2226 ctxt->inputNr = 0;
2227 ctxt->inputMax = 0;
2228 ctxt->input = NULL;
2229 ctxt->nameNr = 0;
2230 ctxt->nameMax = 0;
2231 ctxt->name = NULL;
2232 return;
2233 }
2234 ctxt->nameNr = 0;
2235 ctxt->nameMax = 10;
2236 ctxt->name = NULL;
2237
2238 /* Allocate the space stack */
2239 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2240 if (ctxt->spaceTab == NULL) {
2241 xmlGenericError(xmlGenericErrorContext,
2242 "xmlInitParserCtxt: out of memory\n");
2243 ctxt->nodeNr = 0;
2244 ctxt->nodeMax = 0;
2245 ctxt->node = NULL;
2246 ctxt->inputNr = 0;
2247 ctxt->inputMax = 0;
2248 ctxt->input = NULL;
2249 ctxt->nameNr = 0;
2250 ctxt->nameMax = 0;
2251 ctxt->name = NULL;
2252 ctxt->spaceNr = 0;
2253 ctxt->spaceMax = 0;
2254 ctxt->space = NULL;
2255 return;
2256 }
2257 ctxt->spaceNr = 1;
2258 ctxt->spaceMax = 10;
2259 ctxt->spaceTab[0] = -1;
2260 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002261 ctxt->userData = ctxt;
2262 ctxt->myDoc = NULL;
2263 ctxt->wellFormed = 1;
2264 ctxt->valid = 1;
2265 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2266 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2267 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002268 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002269 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002270 if (ctxt->keepBlanks == 0)
William M. Brack8b2c7f12002-11-22 05:07:29 +00002271 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
Daniel Veillard16698282001-09-14 10:29:27 +00002272
Owen Taylor3473f882001-02-23 17:55:21 +00002273 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002274 ctxt->vctxt.error = xmlParserValidityError;
2275 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002277 if (xmlGetWarningsDefaultValue == 0)
2278 ctxt->vctxt.warning = NULL;
2279 else
2280 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
2283 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2284 ctxt->record_info = 0;
2285 ctxt->nbChars = 0;
2286 ctxt->checkIndex = 0;
2287 ctxt->inSubset = 0;
2288 ctxt->errNo = XML_ERR_OK;
2289 ctxt->depth = 0;
2290 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002291 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002292 xmlInitNodeInfoSeq(&ctxt->node_seq);
2293}
2294
2295/**
2296 * xmlFreeParserCtxt:
2297 * @ctxt: an XML parser context
2298 *
2299 * Free all the memory used by a parser context. However the parsed
2300 * document in ctxt->myDoc is not freed.
2301 */
2302
2303void
2304xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2305{
2306 xmlParserInputPtr input;
2307 xmlChar *oldname;
2308
2309 if (ctxt == NULL) return;
2310
2311 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2312 xmlFreeInputStream(input);
2313 }
2314 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2315 xmlFree(oldname);
2316 }
2317 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2318 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2319 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2320 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2321 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2322 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2323 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2324 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2325 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2327 xmlFree(ctxt->sax);
2328 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002329 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002330#ifdef LIBXML_CATALOG_ENABLED
2331 if (ctxt->catalogs != NULL)
2332 xmlCatalogFreeLocal(ctxt->catalogs);
2333#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002334 xmlFree(ctxt);
2335}
2336
2337/**
2338 * xmlNewParserCtxt:
2339 *
2340 * Allocate and initialize a new parser context.
2341 *
2342 * Returns the xmlParserCtxtPtr or NULL
2343 */
2344
2345xmlParserCtxtPtr
2346xmlNewParserCtxt()
2347{
2348 xmlParserCtxtPtr ctxt;
2349
2350 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2351 if (ctxt == NULL) {
2352 xmlGenericError(xmlGenericErrorContext,
2353 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002354 xmlGenericError(xmlGenericErrorContext, "malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002355 return(NULL);
2356 }
2357 memset(ctxt, 0, sizeof(xmlParserCtxt));
2358 xmlInitParserCtxt(ctxt);
2359 return(ctxt);
2360}
2361
2362/************************************************************************
2363 * *
2364 * Handling of node informations *
2365 * *
2366 ************************************************************************/
2367
2368/**
2369 * xmlClearParserCtxt:
2370 * @ctxt: an XML parser context
2371 *
2372 * Clear (release owned resources) and reinitialize a parser context
2373 */
2374
2375void
2376xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2377{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002378 if (ctxt==NULL)
2379 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002380 xmlClearNodeInfoSeq(&ctxt->node_seq);
2381 xmlInitParserCtxt(ctxt);
2382}
2383
2384/**
2385 * xmlParserFindNodeInfo:
2386 * @ctxt: an XML parser context
2387 * @node: an XML node within the tree
2388 *
2389 * Find the parser node info struct for a given node
2390 *
2391 * Returns an xmlParserNodeInfo block pointer or NULL
2392 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002393const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2394 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002395{
2396 unsigned long pos;
2397
2398 /* Find position where node should be at */
2399 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002400 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002401 return &ctx->node_seq.buffer[pos];
2402 else
2403 return NULL;
2404}
2405
2406
2407/**
2408 * xmlInitNodeInfoSeq:
2409 * @seq: a node info sequence pointer
2410 *
2411 * -- Initialize (set to initial state) node info sequence
2412 */
2413void
2414xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2415{
2416 seq->length = 0;
2417 seq->maximum = 0;
2418 seq->buffer = NULL;
2419}
2420
2421/**
2422 * xmlClearNodeInfoSeq:
2423 * @seq: a node info sequence pointer
2424 *
2425 * -- Clear (release memory and reinitialize) node
2426 * info sequence
2427 */
2428void
2429xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2430{
2431 if ( seq->buffer != NULL )
2432 xmlFree(seq->buffer);
2433 xmlInitNodeInfoSeq(seq);
2434}
2435
2436
2437/**
2438 * xmlParserFindNodeInfoIndex:
2439 * @seq: a node info sequence pointer
2440 * @node: an XML node pointer
2441 *
2442 *
2443 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2444 * the given node is or should be at in a sorted sequence
2445 *
2446 * Returns a long indicating the position of the record
2447 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002448unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2449 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002450{
2451 unsigned long upper, lower, middle;
2452 int found = 0;
2453
2454 /* Do a binary search for the key */
2455 lower = 1;
2456 upper = seq->length;
2457 middle = 0;
2458 while ( lower <= upper && !found) {
2459 middle = lower + (upper - lower) / 2;
2460 if ( node == seq->buffer[middle - 1].node )
2461 found = 1;
2462 else if ( node < seq->buffer[middle - 1].node )
2463 upper = middle - 1;
2464 else
2465 lower = middle + 1;
2466 }
2467
2468 /* Return position */
2469 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2470 return middle;
2471 else
2472 return middle - 1;
2473}
2474
2475
2476/**
2477 * xmlParserAddNodeInfo:
2478 * @ctxt: an XML parser context
2479 * @info: a node info sequence pointer
2480 *
2481 * Insert node info record into the sorted sequence
2482 */
2483void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002484xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002485 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002486{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002487 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002488
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002489 /* Find pos and check to see if node is already in the sequence */
2490 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2491 info->node);
2492 if (pos < ctxt->node_seq.length
2493 && ctxt->node_seq.buffer[pos].node == info->node) {
2494 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002497 /* Otherwise, we need to add new node to buffer */
2498 else {
2499 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2500 xmlParserNodeInfo *tmp_buffer;
2501 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002502
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002503 if (ctxt->node_seq.maximum == 0)
2504 ctxt->node_seq.maximum = 2;
2505 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2506 (2 * ctxt->node_seq.maximum));
2507
2508 if (ctxt->node_seq.buffer == NULL)
2509 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2510 else
2511 tmp_buffer =
2512 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2513 byte_size);
2514
2515 if (tmp_buffer == NULL) {
2516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2517 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2518 ctxt->errNo = XML_ERR_NO_MEMORY;
2519 return;
2520 }
2521 ctxt->node_seq.buffer = tmp_buffer;
2522 ctxt->node_seq.maximum *= 2;
2523 }
2524
2525 /* If position is not at end, move elements out of the way */
2526 if (pos != ctxt->node_seq.length) {
2527 unsigned long i;
2528
2529 for (i = ctxt->node_seq.length; i > pos; i--)
2530 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2531 }
2532
2533 /* Copy element and increase length */
2534 ctxt->node_seq.buffer[pos] = *info;
2535 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537}
2538
2539/************************************************************************
2540 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002541 * Defaults settings *
2542 * *
2543 ************************************************************************/
2544/**
2545 * xmlPedanticParserDefault:
2546 * @val: int 0 or 1
2547 *
2548 * Set and return the previous value for enabling pedantic warnings.
2549 *
2550 * Returns the last value for 0 for no substitution, 1 for substitution.
2551 */
2552
2553int
2554xmlPedanticParserDefault(int val) {
2555 int old = xmlPedanticParserDefaultValue;
2556
2557 xmlPedanticParserDefaultValue = val;
2558 return(old);
2559}
2560
2561/**
2562 * xmlLineNumbersDefault:
2563 * @val: int 0 or 1
2564 *
2565 * Set and return the previous value for enabling line numbers in elements
2566 * contents. This may break on old application and is turned off by default.
2567 *
2568 * Returns the last value for 0 for no substitution, 1 for substitution.
2569 */
2570
2571int
2572xmlLineNumbersDefault(int val) {
2573 int old = xmlLineNumbersDefaultValue;
2574
2575 xmlLineNumbersDefaultValue = val;
2576 return(old);
2577}
2578
2579/**
2580 * xmlSubstituteEntitiesDefault:
2581 * @val: int 0 or 1
2582 *
2583 * Set and return the previous value for default entity support.
2584 * Initially the parser always keep entity references instead of substituting
2585 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002586 * default parser behavior
2587 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002588 * file basis.
2589 *
2590 * Returns the last value for 0 for no substitution, 1 for substitution.
2591 */
2592
2593int
2594xmlSubstituteEntitiesDefault(int val) {
2595 int old = xmlSubstituteEntitiesDefaultValue;
2596
2597 xmlSubstituteEntitiesDefaultValue = val;
2598 return(old);
2599}
2600
2601/**
2602 * xmlKeepBlanksDefault:
2603 * @val: int 0 or 1
2604 *
2605 * Set and return the previous value for default blanks text nodes support.
2606 * The 1.x version of the parser used an heuristic to try to detect
2607 * ignorable white spaces. As a result the SAX callback was generating
2608 * ignorableWhitespace() callbacks instead of characters() one, and when
2609 * using the DOM output text nodes containing those blanks were not generated.
2610 * The 2.x and later version will switch to the XML standard way and
2611 * ignorableWhitespace() are only generated when running the parser in
2612 * validating mode and when the current element doesn't allow CDATA or
2613 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002614 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002615 * on 1.X libs and to switch back to the old mode for compatibility when
2616 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2617 * by using xmlIsBlankNode() commodity function to detect the "empty"
2618 * nodes generated.
2619 * This value also affect autogeneration of indentation when saving code
2620 * if blanks sections are kept, indentation is not generated.
2621 *
2622 * Returns the last value for 0 for no substitution, 1 for substitution.
2623 */
2624
2625int
2626xmlKeepBlanksDefault(int val) {
2627 int old = xmlKeepBlanksDefaultValue;
2628
2629 xmlKeepBlanksDefaultValue = val;
2630 xmlIndentTreeOutput = !val;
2631 return(old);
2632}
2633
2634/************************************************************************
2635 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002636 * Deprecated functions kept for compatibility *
2637 * *
2638 ************************************************************************/
2639
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002640/**
2641 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002642 * @lang: pointer to the string value
2643 *
2644 * Checks that the value conforms to the LanguageID production:
2645 *
2646 * NOTE: this is somewhat deprecated, those productions were removed from
2647 * the XML Second edition.
2648 *
2649 * [33] LanguageID ::= Langcode ('-' Subcode)*
2650 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2651 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2652 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2653 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2654 * [38] Subcode ::= ([a-z] | [A-Z])+
2655 *
2656 * Returns 1 if correct 0 otherwise
2657 **/
2658int
2659xmlCheckLanguageID(const xmlChar *lang) {
2660 const xmlChar *cur = lang;
2661
2662 if (cur == NULL)
2663 return(0);
2664 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2665 ((cur[0] == 'I') && (cur[1] == '-'))) {
2666 /*
2667 * IANA code
2668 */
2669 cur += 2;
2670 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2671 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2672 cur++;
2673 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2674 ((cur[0] == 'X') && (cur[1] == '-'))) {
2675 /*
2676 * User code
2677 */
2678 cur += 2;
2679 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2680 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2681 cur++;
2682 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2683 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2684 /*
2685 * ISO639
2686 */
2687 cur++;
2688 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2689 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2690 cur++;
2691 else
2692 return(0);
2693 } else
2694 return(0);
2695 while (cur[0] != 0) { /* non input consuming */
2696 if (cur[0] != '-')
2697 return(0);
2698 cur++;
2699 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2700 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2701 cur++;
2702 else
2703 return(0);
2704 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2705 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2706 cur++;
2707 }
2708 return(1);
2709}
2710
2711/**
2712 * xmlDecodeEntities:
2713 * @ctxt: the parser context
2714 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2715 * @len: the len to decode (in bytes !), -1 for no size limit
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2719 *
2720 * This function is deprecated, we now always process entities content
2721 * through xmlStringDecodeEntities
2722 *
2723 * TODO: remove it in next major release.
2724 *
2725 * [67] Reference ::= EntityRef | CharRef
2726 *
2727 * [69] PEReference ::= '%' Name ';'
2728 *
2729 * Returns A newly allocated string with the substitution done. The caller
2730 * must deallocate it !
2731 */
2732xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002733xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2734 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002735#if 0
2736 xmlChar *buffer = NULL;
2737 unsigned int buffer_size = 0;
2738 unsigned int nbchars = 0;
2739
2740 xmlChar *current = NULL;
2741 xmlEntityPtr ent;
2742 unsigned int max = (unsigned int) len;
2743 int c,l;
2744#endif
2745
2746 static int deprecated = 0;
2747 if (!deprecated) {
2748 xmlGenericError(xmlGenericErrorContext,
2749 "xmlDecodeEntities() deprecated function reached\n");
2750 deprecated = 1;
2751 }
2752
2753#if 0
2754 if (ctxt->depth > 40) {
2755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2756 ctxt->sax->error(ctxt->userData,
2757 "Detected entity reference loop\n");
2758 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002759 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002760 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2761 return(NULL);
2762 }
2763
2764 /*
2765 * allocate a translation buffer.
2766 */
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2769 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002770 xmlGenericError(xmlGenericErrorContext,
2771 "xmlDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774
2775 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002776 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002777 */
2778 GROW;
2779 c = CUR_CHAR(l);
2780 while ((nbchars < max) && (c != end) && /* NOTUSED */
2781 (c != end2) && (c != end3)) {
2782 GROW;
2783 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002784 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002785 int val = xmlParseCharRef(ctxt);
2786 COPY_BUF(0,buffer,nbchars,val);
2787 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002788 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002789 (what & XML_SUBSTITUTE_REF)) {
2790 if (xmlParserDebugEntities)
2791 xmlGenericError(xmlGenericErrorContext,
2792 "decoding Entity Reference\n");
2793 ent = xmlParseEntityRef(ctxt);
2794 if ((ent != NULL) &&
2795 (ctxt->replaceEntities != 0)) {
2796 current = ent->content;
2797 while (*current != 0) { /* non input consuming loop */
2798 buffer[nbchars++] = *current++;
2799 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2800 growBuffer(buffer);
2801 }
2802 }
2803 } else if (ent != NULL) {
2804 const xmlChar *cur = ent->name;
2805
2806 buffer[nbchars++] = '&';
2807 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2808 growBuffer(buffer);
2809 }
2810 while (*cur != 0) { /* non input consuming loop */
2811 buffer[nbchars++] = *cur++;
2812 }
2813 buffer[nbchars++] = ';';
2814 }
2815 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2816 /*
2817 * a PEReference induce to switch the entity flow,
2818 * we break here to flush the current set of chars
2819 * parsed if any. We will be called back later.
2820 */
2821 if (xmlParserDebugEntities)
2822 xmlGenericError(xmlGenericErrorContext,
2823 "decoding PE Reference\n");
2824 if (nbchars != 0) break;
2825
2826 xmlParsePEReference(ctxt);
2827
2828 /*
2829 * Pop-up of finished entities.
2830 */
2831 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2832 xmlPopInput(ctxt);
2833
2834 break;
2835 } else {
2836 COPY_BUF(l,buffer,nbchars,c);
2837 NEXTL(l);
2838 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2839 growBuffer(buffer);
2840 }
2841 }
2842 c = CUR_CHAR(l);
2843 }
2844 buffer[nbchars++] = 0;
2845 return(buffer);
2846#endif
2847 return(NULL);
2848}
2849
2850/**
2851 * xmlNamespaceParseNCName:
2852 * @ctxt: an XML parser context
2853 *
2854 * parse an XML namespace name.
2855 *
2856 * TODO: this seems not in use anymore, the namespace handling is done on
2857 * top of the SAX interfaces, i.e. not on raw input.
2858 *
2859 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2860 *
2861 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2862 * CombiningChar | Extender
2863 *
2864 * Returns the namespace name or NULL
2865 */
2866
2867xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002868xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869#if 0
2870 xmlChar buf[XML_MAX_NAMELEN + 5];
2871 int len = 0, l;
2872 int cur = CUR_CHAR(l);
2873#endif
2874
2875 static int deprecated = 0;
2876 if (!deprecated) {
2877 xmlGenericError(xmlGenericErrorContext,
2878 "xmlNamespaceParseNCName() deprecated function reached\n");
2879 deprecated = 1;
2880 }
2881
2882#if 0
2883 /* load first the value of the char !!! */
2884 GROW;
2885 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2886
2887xmlGenericError(xmlGenericErrorContext,
2888 "xmlNamespaceParseNCName: reached loop 3\n");
2889 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2890 (cur == '.') || (cur == '-') ||
2891 (cur == '_') ||
2892 (IS_COMBINING(cur)) ||
2893 (IS_EXTENDER(cur))) {
2894 COPY_BUF(l,buf,len,cur);
2895 NEXTL(l);
2896 cur = CUR_CHAR(l);
2897 if (len >= XML_MAX_NAMELEN) {
2898 xmlGenericError(xmlGenericErrorContext,
2899 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2900 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2901 (cur == '.') || (cur == '-') ||
2902 (cur == '_') ||
2903 (IS_COMBINING(cur)) ||
2904 (IS_EXTENDER(cur))) {
2905 NEXTL(l);
2906 cur = CUR_CHAR(l);
2907 }
2908 break;
2909 }
2910 }
2911 return(xmlStrndup(buf, len));
2912#endif
2913 return(NULL);
2914}
2915
2916/**
2917 * xmlNamespaceParseQName:
2918 * @ctxt: an XML parser context
2919 * @prefix: a xmlChar **
2920 *
2921 * TODO: this seems not in use anymore, the namespace handling is done on
2922 * top of the SAX interfaces, i.e. not on raw input.
2923 *
2924 * parse an XML qualified name
2925 *
2926 * [NS 5] QName ::= (Prefix ':')? LocalPart
2927 *
2928 * [NS 6] Prefix ::= NCName
2929 *
2930 * [NS 7] LocalPart ::= NCName
2931 *
2932 * Returns the local part, and prefix is updated
2933 * to get the Prefix if any.
2934 */
2935
2936xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002937xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002938
2939 static int deprecated = 0;
2940 if (!deprecated) {
2941 xmlGenericError(xmlGenericErrorContext,
2942 "xmlNamespaceParseQName() deprecated function reached\n");
2943 deprecated = 1;
2944 }
2945
2946#if 0
2947 xmlChar *ret = NULL;
2948
2949 *prefix = NULL;
2950 ret = xmlNamespaceParseNCName(ctxt);
2951 if (RAW == ':') {
2952 *prefix = ret;
2953 NEXT;
2954 ret = xmlNamespaceParseNCName(ctxt);
2955 }
2956
2957 return(ret);
2958#endif
2959 return(NULL);
2960}
2961
2962/**
2963 * xmlNamespaceParseNSDef:
2964 * @ctxt: an XML parser context
2965 *
2966 * parse a namespace prefix declaration
2967 *
2968 * TODO: this seems not in use anymore, the namespace handling is done on
2969 * top of the SAX interfaces, i.e. not on raw input.
2970 *
2971 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2972 *
2973 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2974 *
2975 * Returns the namespace name
2976 */
2977
2978xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002979xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002980 static int deprecated = 0;
2981 if (!deprecated) {
2982 xmlGenericError(xmlGenericErrorContext,
2983 "xmlNamespaceParseNSDef() deprecated function reached\n");
2984 deprecated = 1;
2985 }
2986 return(NULL);
2987#if 0
2988 xmlChar *name = NULL;
2989
2990 if ((RAW == 'x') && (NXT(1) == 'm') &&
2991 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2992 (NXT(4) == 's')) {
2993 SKIP(5);
2994 if (RAW == ':') {
2995 NEXT;
2996 name = xmlNamespaceParseNCName(ctxt);
2997 }
2998 }
2999 return(name);
3000#endif
3001}
3002
3003/**
3004 * xmlParseQuotedString:
3005 * @ctxt: an XML parser context
3006 *
3007 * Parse and return a string between quotes or doublequotes
3008 *
3009 * TODO: Deprecated, to be removed at next drop of binary compatibility
3010 *
3011 * Returns the string parser or NULL.
3012 */
3013xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003014xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003015 static int deprecated = 0;
3016 if (!deprecated) {
3017 xmlGenericError(xmlGenericErrorContext,
3018 "xmlParseQuotedString() deprecated function reached\n");
3019 deprecated = 1;
3020 }
3021 return(NULL);
3022
3023#if 0
3024 xmlChar *buf = NULL;
3025 int len = 0,l;
3026 int size = XML_PARSER_BUFFER_SIZE;
3027 int c;
3028
3029 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3030 if (buf == NULL) {
3031 xmlGenericError(xmlGenericErrorContext,
3032 "malloc of %d byte failed\n", size);
3033 return(NULL);
3034 }
3035xmlGenericError(xmlGenericErrorContext,
3036 "xmlParseQuotedString: reached loop 4\n");
3037 if (RAW == '"') {
3038 NEXT;
3039 c = CUR_CHAR(l);
3040 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3041 if (len + 5 >= size) {
3042 size *= 2;
3043 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3044 if (buf == NULL) {
3045 xmlGenericError(xmlGenericErrorContext,
3046 "realloc of %d byte failed\n", size);
3047 return(NULL);
3048 }
3049 }
3050 COPY_BUF(l,buf,len,c);
3051 NEXTL(l);
3052 c = CUR_CHAR(l);
3053 }
3054 if (c != '"') {
3055 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057 ctxt->sax->error(ctxt->userData,
3058 "String not closed \"%.50s\"\n", buf);
3059 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003060 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003061 } else {
3062 NEXT;
3063 }
3064 } else if (RAW == '\''){
3065 NEXT;
3066 c = CUR;
3067 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3068 if (len + 1 >= size) {
3069 size *= 2;
3070 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3071 if (buf == NULL) {
3072 xmlGenericError(xmlGenericErrorContext,
3073 "realloc of %d byte failed\n", size);
3074 return(NULL);
3075 }
3076 }
3077 buf[len++] = c;
3078 NEXT;
3079 c = CUR;
3080 }
3081 if (RAW != '\'') {
3082 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3084 ctxt->sax->error(ctxt->userData,
3085 "String not closed \"%.50s\"\n", buf);
3086 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003087 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003088 } else {
3089 NEXT;
3090 }
3091 }
3092 return(buf);
3093#endif
3094}
3095
3096/**
3097 * xmlParseNamespace:
3098 * @ctxt: an XML parser context
3099 *
3100 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3101 *
3102 * This is what the older xml-name Working Draft specified, a bunch of
3103 * other stuff may still rely on it, so support is still here as
3104 * if it was declared on the root of the Tree:-(
3105 *
3106 * TODO: remove from library
3107 *
3108 * To be removed at next drop of binary compatibility
3109 */
3110
3111void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003112xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003113 static int deprecated = 0;
3114 if (!deprecated) {
3115 xmlGenericError(xmlGenericErrorContext,
3116 "xmlParseNamespace() deprecated function reached\n");
3117 deprecated = 1;
3118 }
3119
3120#if 0
3121 xmlChar *href = NULL;
3122 xmlChar *prefix = NULL;
3123 int garbage = 0;
3124
3125 /*
3126 * We just skipped "namespace" or "xml:namespace"
3127 */
3128 SKIP_BLANKS;
3129
3130xmlGenericError(xmlGenericErrorContext,
3131 "xmlParseNamespace: reached loop 5\n");
3132 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3133 /*
3134 * We can have "ns" or "prefix" attributes
3135 * Old encoding as 'href' or 'AS' attributes is still supported
3136 */
3137 if ((RAW == 'n') && (NXT(1) == 's')) {
3138 garbage = 0;
3139 SKIP(2);
3140 SKIP_BLANKS;
3141
3142 if (RAW != '=') continue;
3143 NEXT;
3144 SKIP_BLANKS;
3145
3146 href = xmlParseQuotedString(ctxt);
3147 SKIP_BLANKS;
3148 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3149 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3150 garbage = 0;
3151 SKIP(4);
3152 SKIP_BLANKS;
3153
3154 if (RAW != '=') continue;
3155 NEXT;
3156 SKIP_BLANKS;
3157
3158 href = xmlParseQuotedString(ctxt);
3159 SKIP_BLANKS;
3160 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3161 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3162 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3163 garbage = 0;
3164 SKIP(6);
3165 SKIP_BLANKS;
3166
3167 if (RAW != '=') continue;
3168 NEXT;
3169 SKIP_BLANKS;
3170
3171 prefix = xmlParseQuotedString(ctxt);
3172 SKIP_BLANKS;
3173 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3174 garbage = 0;
3175 SKIP(2);
3176 SKIP_BLANKS;
3177
3178 if (RAW != '=') continue;
3179 NEXT;
3180 SKIP_BLANKS;
3181
3182 prefix = xmlParseQuotedString(ctxt);
3183 SKIP_BLANKS;
3184 } else if ((RAW == '?') && (NXT(1) == '>')) {
3185 garbage = 0;
3186 NEXT;
3187 } else {
3188 /*
3189 * Found garbage when parsing the namespace
3190 */
3191 if (!garbage) {
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData,
3194 "xmlParseNamespace found garbage\n");
3195 }
3196 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3197 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003198 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003199 NEXT;
3200 }
3201 }
3202
3203 MOVETO_ENDTAG(CUR_PTR);
3204 NEXT;
3205
3206 /*
3207 * Register the DTD.
3208 if (href != NULL)
3209 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3210 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3211 */
3212
3213 if (prefix != NULL) xmlFree(prefix);
3214 if (href != NULL) xmlFree(href);
3215#endif
3216}
3217
3218/**
3219 * xmlScanName:
3220 * @ctxt: an XML parser context
3221 *
3222 * Trickery: parse an XML name but without consuming the input flow
3223 * Needed for rollback cases. Used only when parsing entities references.
3224 *
3225 * TODO: seems deprecated now, only used in the default part of
3226 * xmlParserHandleReference
3227 *
3228 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3229 * CombiningChar | Extender
3230 *
3231 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3232 *
3233 * [6] Names ::= Name (S Name)*
3234 *
3235 * Returns the Name parsed or NULL
3236 */
3237
3238xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003239xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003240 static int deprecated = 0;
3241 if (!deprecated) {
3242 xmlGenericError(xmlGenericErrorContext,
3243 "xmlScanName() deprecated function reached\n");
3244 deprecated = 1;
3245 }
3246 return(NULL);
3247
3248#if 0
3249 xmlChar buf[XML_MAX_NAMELEN];
3250 int len = 0;
3251
3252 GROW;
3253 if (!IS_LETTER(RAW) && (RAW != '_') &&
3254 (RAW != ':')) {
3255 return(NULL);
3256 }
3257
3258
3259 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3260 (NXT(len) == '.') || (NXT(len) == '-') ||
3261 (NXT(len) == '_') || (NXT(len) == ':') ||
3262 (IS_COMBINING(NXT(len))) ||
3263 (IS_EXTENDER(NXT(len)))) {
3264 GROW;
3265 buf[len] = NXT(len);
3266 len++;
3267 if (len >= XML_MAX_NAMELEN) {
3268 xmlGenericError(xmlGenericErrorContext,
3269 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3270 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3271 (IS_DIGIT(NXT(len))) ||
3272 (NXT(len) == '.') || (NXT(len) == '-') ||
3273 (NXT(len) == '_') || (NXT(len) == ':') ||
3274 (IS_COMBINING(NXT(len))) ||
3275 (IS_EXTENDER(NXT(len))))
3276 len++;
3277 break;
3278 }
3279 }
3280 return(xmlStrndup(buf, len));
3281#endif
3282}
3283
3284/**
3285 * xmlParserHandleReference:
3286 * @ctxt: the parser context
3287 *
3288 * TODO: Remove, now deprecated ... the test is done directly in the
3289 * content parsing
3290 * routines.
3291 *
3292 * [67] Reference ::= EntityRef | CharRef
3293 *
3294 * [68] EntityRef ::= '&' Name ';'
3295 *
3296 * [ WFC: Entity Declared ]
3297 * the Name given in the entity reference must match that in an entity
3298 * declaration, except that well-formed documents need not declare any
3299 * of the following entities: amp, lt, gt, apos, quot.
3300 *
3301 * [ WFC: Parsed Entity ]
3302 * An entity reference must not contain the name of an unparsed entity
3303 *
3304 * [66] CharRef ::= '&#' [0-9]+ ';' |
3305 * '&#x' [0-9a-fA-F]+ ';'
3306 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003307 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003308 * the handling is done accordingly to
3309 * http://www.w3.org/TR/REC-xml#entproc
3310 */
3311void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003312xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003313 static int deprecated = 0;
3314 if (!deprecated) {
3315 xmlGenericError(xmlGenericErrorContext,
3316 "xmlParserHandleReference() deprecated function reached\n");
3317 deprecated = 1;
3318 }
3319
Owen Taylor3473f882001-02-23 17:55:21 +00003320 return;
3321}
3322
3323/**
3324 * xmlHandleEntity:
3325 * @ctxt: an XML parser context
3326 * @entity: an XML entity pointer.
3327 *
3328 * Default handling of defined entities, when should we define a new input
3329 * stream ? When do we just handle that as a set of chars ?
3330 *
3331 * OBSOLETE: to be removed at some point.
3332 */
3333
3334void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003335xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003336 static int deprecated = 0;
3337 if (!deprecated) {
3338 xmlGenericError(xmlGenericErrorContext,
3339 "xmlHandleEntity() deprecated function reached\n");
3340 deprecated = 1;
3341 }
3342
3343#if 0
3344 int len;
3345 xmlParserInputPtr input;
3346
3347 if (entity->content == NULL) {
3348 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3351 entity->name);
3352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return;
3355 }
3356 len = xmlStrlen(entity->content);
3357 if (len <= 2) goto handle_as_char;
3358
3359 /*
3360 * Redefine its content as an input stream.
3361 */
3362 input = xmlNewEntityInputStream(ctxt, entity);
3363 xmlPushInput(ctxt, input);
3364 return;
3365
3366handle_as_char:
3367 /*
3368 * Just handle the content as a set of chars.
3369 */
3370 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3371 (ctxt->sax->characters != NULL))
3372 ctxt->sax->characters(ctxt->userData, entity->content, len);
3373#endif
3374}
3375
3376/**
3377 * xmlNewGlobalNs:
3378 * @doc: the document carrying the namespace
3379 * @href: the URI associated
3380 * @prefix: the prefix for the namespace
3381 *
3382 * Creation of a Namespace, the old way using PI and without scoping
3383 * DEPRECATED !!!
3384 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003385 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003386 */
3387xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003388xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3389 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003390 static int deprecated = 0;
3391 if (!deprecated) {
3392 xmlGenericError(xmlGenericErrorContext,
3393 "xmlNewGlobalNs() deprecated function reached\n");
3394 deprecated = 1;
3395 }
3396 return(NULL);
3397#if 0
3398 xmlNodePtr root;
3399
3400 xmlNsPtr cur;
3401
3402 root = xmlDocGetRootElement(doc);
3403 if (root != NULL)
3404 return(xmlNewNs(root, href, prefix));
3405
3406 /*
3407 * if there is no root element yet, create an old Namespace type
3408 * and it will be moved to the root at save time.
3409 */
3410 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3411 if (cur == NULL) {
3412 xmlGenericError(xmlGenericErrorContext,
3413 "xmlNewGlobalNs : malloc failed\n");
3414 return(NULL);
3415 }
3416 memset(cur, 0, sizeof(xmlNs));
3417 cur->type = XML_GLOBAL_NAMESPACE;
3418
3419 if (href != NULL)
3420 cur->href = xmlStrdup(href);
3421 if (prefix != NULL)
3422 cur->prefix = xmlStrdup(prefix);
3423
3424 /*
3425 * Add it at the end to preserve parsing order ...
3426 */
3427 if (doc != NULL) {
3428 if (doc->oldNs == NULL) {
3429 doc->oldNs = cur;
3430 } else {
3431 xmlNsPtr prev = doc->oldNs;
3432
3433 while (prev->next != NULL) prev = prev->next;
3434 prev->next = cur;
3435 }
3436 }
3437
3438 return(NULL);
3439#endif
3440}
3441
3442/**
3443 * xmlUpgradeOldNs:
3444 * @doc: a document pointer
3445 *
3446 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3447 * DEPRECATED
3448 */
3449void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003450xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003451 static int deprecated = 0;
3452 if (!deprecated) {
3453 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003454 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003455 deprecated = 1;
3456 }
3457#if 0
3458 xmlNsPtr cur;
3459
3460 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3461 if (doc->children == NULL) {
3462#ifdef DEBUG_TREE
3463 xmlGenericError(xmlGenericErrorContext,
3464 "xmlUpgradeOldNs: failed no root !\n");
3465#endif
3466 return;
3467 }
3468
3469 cur = doc->oldNs;
3470 while (cur->next != NULL) {
3471 cur->type = XML_LOCAL_NAMESPACE;
3472 cur = cur->next;
3473 }
3474 cur->type = XML_LOCAL_NAMESPACE;
3475 cur->next = doc->children->nsDef;
3476 doc->children->nsDef = doc->oldNs;
3477 doc->oldNs = NULL;
3478#endif
3479}
3480