blob: 8d4e680fc4d4aaa7cb201e57254923c8ee9b68bb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
12
Daniel Veillard3c5ed912002-01-08 10:36:16 +000013#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000014#define XML_DIR_SEP '\\'
15#else
Owen Taylor3473f882001-02-23 17:55:21 +000016#define XML_DIR_SEP '/'
17#endif
18
Owen Taylor3473f882001-02-23 17:55:21 +000019#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000050#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000051#ifdef LIBXML_CATALOG_ENABLED
52#include <libxml/catalog.h>
53#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000054#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillard56a4cb82001-03-24 17:00:36 +000056void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000057
Daniel Veillarda53c6882001-07-25 17:18:57 +000058/*
59 * Various global defaults for parsing
60 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000061#ifdef VMS
62int xmlSubstituteEntitiesDefaultVal = 0;
63#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
64int xmlDoValidityCheckingDefaultVal = 0;
65#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000066#endif
Owen Taylor3473f882001-02-23 17:55:21 +000067
Daniel Veillard5e2dace2001-07-18 19:30:27 +000068/**
Owen Taylor3473f882001-02-23 17:55:21 +000069 * xmlCheckVersion:
70 * @version: the include version number
71 *
72 * check the compiled lib version against the include one.
73 * This can warn or immediately kill the application
74 */
75void
76xmlCheckVersion(int version) {
77 int myversion = (int) LIBXML_VERSION;
78
Daniel Veillard6f350292001-10-14 09:56:15 +000079 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000080
Owen Taylor3473f882001-02-23 17:55:21 +000081 if ((myversion / 10000) != (version / 10000)) {
82 xmlGenericError(xmlGenericErrorContext,
83 "Fatal: program compiled against libxml %d using libxml %d\n",
84 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000085 fprintf(stderr,
86 "Fatal: program compiled against libxml %d using libxml %d\n",
87 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000088 }
89 if ((myversion / 100) < (version / 100)) {
90 xmlGenericError(xmlGenericErrorContext,
91 "Warning: program compiled against libxml %d using older %d\n",
92 (version / 100), (myversion / 100));
93 }
94}
95
96
Daniel Veillard22090732001-07-16 00:06:07 +000097static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "validate",
99 "load subset",
100 "keep blanks",
101 "disable SAX",
102 "fetch external entities",
103 "substitute entities",
104 "gather line info",
105 "user data",
106 "is html",
107 "is standalone",
108 "stop parser",
109 "document",
110 "is well formed",
111 "is valid",
112 "SAX block",
113 "SAX function internalSubset",
114 "SAX function isStandalone",
115 "SAX function hasInternalSubset",
116 "SAX function hasExternalSubset",
117 "SAX function resolveEntity",
118 "SAX function getEntity",
119 "SAX function entityDecl",
120 "SAX function notationDecl",
121 "SAX function attributeDecl",
122 "SAX function elementDecl",
123 "SAX function unparsedEntityDecl",
124 "SAX function setDocumentLocator",
125 "SAX function startDocument",
126 "SAX function endDocument",
127 "SAX function startElement",
128 "SAX function endElement",
129 "SAX function reference",
130 "SAX function characters",
131 "SAX function ignorableWhitespace",
132 "SAX function processingInstruction",
133 "SAX function comment",
134 "SAX function warning",
135 "SAX function error",
136 "SAX function fatalError",
137 "SAX function getParameterEntity",
138 "SAX function cdataBlock",
139 "SAX function externalSubset",
140};
141
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000142/**
Owen Taylor3473f882001-02-23 17:55:21 +0000143 * xmlGetFeaturesList:
144 * @len: the length of the features name array (input/output)
145 * @result: an array of string to be filled with the features name.
146 *
147 * Copy at most *@len feature names into the @result array
148 *
149 * Returns -1 in case or error, or the total number of features,
150 * len is updated with the number of strings copied,
151 * strings must not be deallocated
152 */
153int
154xmlGetFeaturesList(int *len, const char **result) {
155 int ret, i;
156
157 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
158 if ((len == NULL) || (result == NULL))
159 return(ret);
160 if ((*len < 0) || (*len >= 1000))
161 return(-1);
162 if (*len > ret)
163 *len = ret;
164 for (i = 0;i < *len;i++)
165 result[i] = xmlFeaturesList[i];
166 return(ret);
167}
168
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000169/**
Owen Taylor3473f882001-02-23 17:55:21 +0000170 * xmlGetFeature:
171 * @ctxt: an XML/HTML parser context
172 * @name: the feature name
173 * @result: location to store the result
174 *
175 * Read the current value of one feature of this parser instance
176 *
177 * Returns -1 in case or error, 0 otherwise
178 */
179int
180xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
181 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
182 return(-1);
183
184 if (!strcmp(name, "validate")) {
185 *((int *) result) = ctxt->validate;
186 } else if (!strcmp(name, "keep blanks")) {
187 *((int *) result) = ctxt->keepBlanks;
188 } else if (!strcmp(name, "disable SAX")) {
189 *((int *) result) = ctxt->disableSAX;
190 } else if (!strcmp(name, "fetch external entities")) {
191 *((int *) result) = ctxt->loadsubset;
192 } else if (!strcmp(name, "substitute entities")) {
193 *((int *) result) = ctxt->replaceEntities;
194 } else if (!strcmp(name, "gather line info")) {
195 *((int *) result) = ctxt->record_info;
196 } else if (!strcmp(name, "user data")) {
197 *((void **)result) = ctxt->userData;
198 } else if (!strcmp(name, "is html")) {
199 *((int *) result) = ctxt->html;
200 } else if (!strcmp(name, "is standalone")) {
201 *((int *) result) = ctxt->standalone;
202 } else if (!strcmp(name, "document")) {
203 *((xmlDocPtr *) result) = ctxt->myDoc;
204 } else if (!strcmp(name, "is well formed")) {
205 *((int *) result) = ctxt->wellFormed;
206 } else if (!strcmp(name, "is valid")) {
207 *((int *) result) = ctxt->valid;
208 } else if (!strcmp(name, "SAX block")) {
209 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
210 } else if (!strcmp(name, "SAX function internalSubset")) {
211 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
212 } else if (!strcmp(name, "SAX function isStandalone")) {
213 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
214 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
215 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
216 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
217 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
218 } else if (!strcmp(name, "SAX function resolveEntity")) {
219 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
220 } else if (!strcmp(name, "SAX function getEntity")) {
221 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
222 } else if (!strcmp(name, "SAX function entityDecl")) {
223 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
224 } else if (!strcmp(name, "SAX function notationDecl")) {
225 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
226 } else if (!strcmp(name, "SAX function attributeDecl")) {
227 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
228 } else if (!strcmp(name, "SAX function elementDecl")) {
229 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
230 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
231 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
232 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
233 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
234 } else if (!strcmp(name, "SAX function startDocument")) {
235 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
236 } else if (!strcmp(name, "SAX function endDocument")) {
237 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
238 } else if (!strcmp(name, "SAX function startElement")) {
239 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
240 } else if (!strcmp(name, "SAX function endElement")) {
241 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
242 } else if (!strcmp(name, "SAX function reference")) {
243 *((referenceSAXFunc *) result) = ctxt->sax->reference;
244 } else if (!strcmp(name, "SAX function characters")) {
245 *((charactersSAXFunc *) result) = ctxt->sax->characters;
246 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
247 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
248 } else if (!strcmp(name, "SAX function processingInstruction")) {
249 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
250 } else if (!strcmp(name, "SAX function comment")) {
251 *((commentSAXFunc *) result) = ctxt->sax->comment;
252 } else if (!strcmp(name, "SAX function warning")) {
253 *((warningSAXFunc *) result) = ctxt->sax->warning;
254 } else if (!strcmp(name, "SAX function error")) {
255 *((errorSAXFunc *) result) = ctxt->sax->error;
256 } else if (!strcmp(name, "SAX function fatalError")) {
257 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
258 } else if (!strcmp(name, "SAX function getParameterEntity")) {
259 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
260 } else if (!strcmp(name, "SAX function cdataBlock")) {
261 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
262 } else if (!strcmp(name, "SAX function externalSubset")) {
263 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
264 } else {
265 return(-1);
266 }
267 return(0);
268}
269
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000270/**
Owen Taylor3473f882001-02-23 17:55:21 +0000271 * xmlSetFeature:
272 * @ctxt: an XML/HTML parser context
273 * @name: the feature name
274 * @value: pointer to the location of the new value
275 *
276 * Change the current value of one feature of this parser instance
277 *
278 * Returns -1 in case or error, 0 otherwise
279 */
280int
281xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
282 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
283 return(-1);
284
285 if (!strcmp(name, "validate")) {
286 int newvalidate = *((int *) value);
287 if ((!ctxt->validate) && (newvalidate != 0)) {
288 if (ctxt->vctxt.warning == NULL)
289 ctxt->vctxt.warning = xmlParserValidityWarning;
290 if (ctxt->vctxt.error == NULL)
291 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000292 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000293 }
294 ctxt->validate = newvalidate;
295 } else if (!strcmp(name, "keep blanks")) {
296 ctxt->keepBlanks = *((int *) value);
297 } else if (!strcmp(name, "disable SAX")) {
298 ctxt->disableSAX = *((int *) value);
299 } else if (!strcmp(name, "fetch external entities")) {
300 ctxt->loadsubset = *((int *) value);
301 } else if (!strcmp(name, "substitute entities")) {
302 ctxt->replaceEntities = *((int *) value);
303 } else if (!strcmp(name, "gather line info")) {
304 ctxt->record_info = *((int *) value);
305 } else if (!strcmp(name, "user data")) {
306 ctxt->userData = *((void **)value);
307 } else if (!strcmp(name, "is html")) {
308 ctxt->html = *((int *) value);
309 } else if (!strcmp(name, "is standalone")) {
310 ctxt->standalone = *((int *) value);
311 } else if (!strcmp(name, "document")) {
312 ctxt->myDoc = *((xmlDocPtr *) value);
313 } else if (!strcmp(name, "is well formed")) {
314 ctxt->wellFormed = *((int *) value);
315 } else if (!strcmp(name, "is valid")) {
316 ctxt->valid = *((int *) value);
317 } else if (!strcmp(name, "SAX block")) {
318 ctxt->sax = *((xmlSAXHandlerPtr *) value);
319 } else if (!strcmp(name, "SAX function internalSubset")) {
320 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function isStandalone")) {
322 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
324 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
325 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
326 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function resolveEntity")) {
328 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
329 } else if (!strcmp(name, "SAX function getEntity")) {
330 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
331 } else if (!strcmp(name, "SAX function entityDecl")) {
332 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function notationDecl")) {
334 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function attributeDecl")) {
336 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function elementDecl")) {
338 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
340 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
342 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function startDocument")) {
344 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function endDocument")) {
346 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function startElement")) {
348 ctxt->sax->startElement = *((startElementSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function endElement")) {
350 ctxt->sax->endElement = *((endElementSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function reference")) {
352 ctxt->sax->reference = *((referenceSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function characters")) {
354 ctxt->sax->characters = *((charactersSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
356 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function processingInstruction")) {
358 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function comment")) {
360 ctxt->sax->comment = *((commentSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function warning")) {
362 ctxt->sax->warning = *((warningSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function error")) {
364 ctxt->sax->error = *((errorSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function fatalError")) {
366 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function getParameterEntity")) {
368 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
369 } else if (!strcmp(name, "SAX function cdataBlock")) {
370 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function externalSubset")) {
372 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
373 } else {
374 return(-1);
375 }
376 return(0);
377}
378
379/************************************************************************
380 * *
381 * Some functions to avoid too large macros *
382 * *
383 ************************************************************************/
384
385/**
386 * xmlIsChar:
387 * @c: an unicode character (int)
388 *
389 * Check whether the character is allowed by the production
390 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
391 * | [#x10000-#x10FFFF]
392 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
393 * Also available as a macro IS_CHAR()
394 *
395 * Returns 0 if not, non-zero otherwise
396 */
397int
398xmlIsChar(int c) {
399 return(
400 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
401 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
402 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
403 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
404}
405
406/**
407 * xmlIsBlank:
408 * @c: an unicode character (int)
409 *
410 * Check whether the character is allowed by the production
411 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
412 * Also available as a macro IS_BLANK()
413 *
414 * Returns 0 if not, non-zero otherwise
415 */
416int
417xmlIsBlank(int c) {
418 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
419}
420
421/**
422 * xmlIsBaseChar:
423 * @c: an unicode character (int)
424 *
425 * Check whether the character is allowed by the production
426 * [85] BaseChar ::= ... long list see REC ...
427 *
428 * VI is your friend !
429 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
430 * and
431 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
432 *
433 * Returns 0 if not, non-zero otherwise
434 */
435static int xmlBaseArray[] = {
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
440 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
442 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
449 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
450 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
451 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
452};
453
454int
455xmlIsBaseChar(int c) {
456 return(
457 (((c) < 0x0100) ? xmlBaseArray[c] :
458 ( /* accelerator */
459 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
460 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
461 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
462 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
463 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
464 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
465 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
466 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
467 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
468 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
469 ((c) == 0x0386) ||
470 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
471 ((c) == 0x038C) ||
472 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
473 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
474 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
475 ((c) == 0x03DA) ||
476 ((c) == 0x03DC) ||
477 ((c) == 0x03DE) ||
478 ((c) == 0x03E0) ||
479 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
480 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
481 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
482 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
483 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
484 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
485 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
486 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
487 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
488 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
489 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
490 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
491 ((c) == 0x0559) ||
492 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
493 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
494 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
495 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
496 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
497 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
498 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
499 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
500 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
501 ((c) == 0x06D5) ||
502 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
503 (((c) >= 0x905) && ( /* accelerator */
504 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
505 ((c) == 0x093D) ||
506 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
507 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
508 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
509 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
510 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
511 ((c) == 0x09B2) ||
512 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
513 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
514 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
515 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
516 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
517 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
518 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
519 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
520 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
521 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
522 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
523 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
524 ((c) == 0x0A5E) ||
525 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
526 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
527 ((c) == 0x0A8D) ||
528 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
529 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
530 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
531 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
532 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
533 ((c) == 0x0ABD) ||
534 ((c) == 0x0AE0) ||
535 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
536 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
537 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
538 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
539 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
540 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
541 ((c) == 0x0B3D) ||
542 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
543 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
544 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
545 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
546 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
547 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
548 ((c) == 0x0B9C) ||
549 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
550 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
551 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
552 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
553 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
554 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
555 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
556 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
557 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
558 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
559 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
560 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
561 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
562 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
563 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
564 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
565 ((c) == 0x0CDE) ||
566 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
567 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
568 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
569 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
570 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
571 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
572 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
573 ((c) == 0x0E30) ||
574 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
575 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
576 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
577 ((c) == 0x0E84) ||
578 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
579 ((c) == 0x0E8A) ||
580 ((c) == 0x0E8D) ||
581 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
582 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
583 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
584 ((c) == 0x0EA5) ||
585 ((c) == 0x0EA7) ||
586 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
587 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
588 ((c) == 0x0EB0) ||
589 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
590 ((c) == 0x0EBD) ||
591 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
592 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
593 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
594 (((c) >= 0x10A0) && ( /* accelerator */
595 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
596 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
597 ((c) == 0x1100) ||
598 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
599 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
600 ((c) == 0x1109) ||
601 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
602 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
603 ((c) == 0x113C) ||
604 ((c) == 0x113E) ||
605 ((c) == 0x1140) ||
606 ((c) == 0x114C) ||
607 ((c) == 0x114E) ||
608 ((c) == 0x1150) ||
609 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
610 ((c) == 0x1159) ||
611 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
612 ((c) == 0x1163) ||
613 ((c) == 0x1165) ||
614 ((c) == 0x1167) ||
615 ((c) == 0x1169) ||
616 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
617 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
618 ((c) == 0x1175) ||
619 ((c) == 0x119E) ||
620 ((c) == 0x11A8) ||
621 ((c) == 0x11AB) ||
622 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
623 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
624 ((c) == 0x11BA) ||
625 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
626 ((c) == 0x11EB) ||
627 ((c) == 0x11F0) ||
628 ((c) == 0x11F9) ||
629 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
630 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
631 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
632 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
633 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
634 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
635 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
636 ((c) == 0x1F59) ||
637 ((c) == 0x1F5B) ||
638 ((c) == 0x1F5D) ||
639 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
640 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
641 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
642 ((c) == 0x1FBE) ||
643 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
644 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
645 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
646 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
647 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
648 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
649 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
650 ((c) == 0x2126) ||
651 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
652 ((c) == 0x212E) ||
653 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
654 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
655 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
656 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
657 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
658}
659
660/**
661 * xmlIsDigit:
662 * @c: an unicode character (int)
663 *
664 * Check whether the character is allowed by the production
665 * [88] Digit ::= ... long list see REC ...
666 *
667 * Returns 0 if not, non-zero otherwise
668 */
669int
670xmlIsDigit(int c) {
671 return(
672 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
673 (((c) >= 0x660) && ( /* accelerator */
674 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
675 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
676 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
677 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
678 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
679 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
680 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
681 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
682 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
683 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
684 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
685 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
686 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
687 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
688}
689
690/**
691 * xmlIsCombining:
692 * @c: an unicode character (int)
693 *
694 * Check whether the character is allowed by the production
695 * [87] CombiningChar ::= ... long list see REC ...
696 *
697 * Returns 0 if not, non-zero otherwise
698 */
699int
700xmlIsCombining(int c) {
701 return(
702 (((c) >= 0x300) && ( /* accelerator */
703 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
704 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
705 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
706 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
707 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
708 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
709 ((c) == 0x05BF) ||
710 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
711 ((c) == 0x05C4) ||
712 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
713 ((c) == 0x0670) ||
714 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
715 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
716 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
717 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
718 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
719 (((c) >= 0x0901) && ( /* accelerator */
720 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
721 ((c) == 0x093C) ||
722 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
723 ((c) == 0x094D) ||
724 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
725 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
726 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
727 ((c) == 0x09BC) ||
728 ((c) == 0x09BE) ||
729 ((c) == 0x09BF) ||
730 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
731 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
732 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
733 ((c) == 0x09D7) ||
734 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
735 (((c) >= 0x0A02) && ( /* accelerator */
736 ((c) == 0x0A02) ||
737 ((c) == 0x0A3C) ||
738 ((c) == 0x0A3E) ||
739 ((c) == 0x0A3F) ||
740 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
741 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
742 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
743 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
744 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
745 ((c) == 0x0ABC) ||
746 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
747 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
748 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
749 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
750 ((c) == 0x0B3C) ||
751 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
752 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
753 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
754 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
755 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
756 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
757 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
758 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
759 ((c) == 0x0BD7) ||
760 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
761 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
762 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
763 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
764 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
765 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
766 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
767 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
768 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
769 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
770 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
771 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
772 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
773 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
774 ((c) == 0x0D57) ||
775 (((c) >= 0x0E31) && ( /* accelerator */
776 ((c) == 0x0E31) ||
777 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
778 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
779 ((c) == 0x0EB1) ||
780 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
781 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
782 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
783 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
784 ((c) == 0x0F35) ||
785 ((c) == 0x0F37) ||
786 ((c) == 0x0F39) ||
787 ((c) == 0x0F3E) ||
788 ((c) == 0x0F3F) ||
789 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
790 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
791 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
792 ((c) == 0x0F97) ||
793 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
794 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
795 ((c) == 0x0FB9) ||
796 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
797 ((c) == 0x20E1) ||
798 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
799 ((c) == 0x3099) ||
800 ((c) == 0x309A))))))))));
801}
802
803/**
804 * xmlIsExtender:
805 * @c: an unicode character (int)
806 *
807 * Check whether the character is allowed by the production
808 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
809 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
810 * [#x309D-#x309E] | [#x30FC-#x30FE]
811 *
812 * Returns 0 if not, non-zero otherwise
813 */
814int
815xmlIsExtender(int c) {
816 switch (c) {
817 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
818 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
819 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
820 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000821 case 0x30FD: case 0x30FE:
Owen Taylor3473f882001-02-23 17:55:21 +0000822 return 1;
823 default:
824 return 0;
825 }
826}
827
828/**
829 * xmlIsIdeographic:
830 * @c: an unicode character (int)
831 *
832 * Check whether the character is allowed by the production
833 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
834 *
835 * Returns 0 if not, non-zero otherwise
836 */
837int
838xmlIsIdeographic(int c) {
839 return(((c) < 0x0100) ? 0 :
840 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
841 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
842 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
843 ((c) == 0x3007));
844}
845
846/**
847 * xmlIsLetter:
848 * @c: an unicode character (int)
849 *
850 * Check whether the character is allowed by the production
851 * [84] Letter ::= BaseChar | Ideographic
852 *
853 * Returns 0 if not, non-zero otherwise
854 */
855int
856xmlIsLetter(int c) {
857 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
858}
859
860/**
861 * xmlIsPubidChar:
862 * @c: an unicode character (int)
863 *
864 * Check whether the character is allowed by the production
865 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
866 *
867 * Returns 0 if not, non-zero otherwise
868 */
869int
870xmlIsPubidChar(int c) {
871 return(
872 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
873 (((c) >= 'a') && ((c) <= 'z')) ||
874 (((c) >= 'A') && ((c) <= 'Z')) ||
875 (((c) >= '0') && ((c) <= '9')) ||
876 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
877 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
878 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
879 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
880 ((c) == '$') || ((c) == '_') || ((c) == '%'));
881}
882
883/************************************************************************
884 * *
885 * Input handling functions for progressive parsing *
886 * *
887 ************************************************************************/
888
889/* #define DEBUG_INPUT */
890/* #define DEBUG_STACK */
891/* #define DEBUG_PUSH */
892
893
894/* we need to keep enough input to show errors in context */
895#define LINE_LEN 80
896
897#ifdef DEBUG_INPUT
898#define CHECK_BUFFER(in) check_buffer(in)
899
900void check_buffer(xmlParserInputPtr in) {
901 if (in->base != in->buf->buffer->content) {
902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: base mismatch problem\n");
904 }
905 if (in->cur < in->base) {
906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur < base problem\n");
908 }
909 if (in->cur > in->base + in->buf->buffer->use) {
910 xmlGenericError(xmlGenericErrorContext,
911 "xmlParserInput: cur > base + use problem\n");
912 }
913 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
914 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
915 in->buf->buffer->use, in->buf->buffer->size);
916}
917
918#else
919#define CHECK_BUFFER(in)
920#endif
921
922
923/**
924 * xmlParserInputRead:
925 * @in: an XML parser input
926 * @len: an indicative size for the lookahead
927 *
928 * This function refresh the input for the parser. It doesn't try to
929 * preserve pointers to the input buffer, and discard already read data
930 *
931 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
932 * end of this entity
933 */
934int
935xmlParserInputRead(xmlParserInputPtr in, int len) {
936 int ret;
937 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000938 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000939
940#ifdef DEBUG_INPUT
941 xmlGenericError(xmlGenericErrorContext, "Read\n");
942#endif
943 if (in->buf == NULL) return(-1);
944 if (in->base == NULL) return(-1);
945 if (in->cur == NULL) return(-1);
946 if (in->buf->buffer == NULL) return(-1);
947 if (in->buf->readcallback == NULL) return(-1);
948
949 CHECK_BUFFER(in);
950
951 used = in->cur - in->buf->buffer->content;
952 ret = xmlBufferShrink(in->buf->buffer, used);
953 if (ret > 0) {
954 in->cur -= ret;
955 in->consumed += ret;
956 }
957 ret = xmlParserInputBufferRead(in->buf, len);
958 if (in->base != in->buf->buffer->content) {
959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000960 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000961 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000962 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000963 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000964 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000965 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000966 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000967
968 CHECK_BUFFER(in);
969
970 return(ret);
971}
972
973/**
974 * xmlParserInputGrow:
975 * @in: an XML parser input
976 * @len: an indicative size for the lookahead
977 *
978 * This function increase the input for the parser. It tries to
979 * preserve pointers to the input buffer, and keep already read data
980 *
981 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
982 * end of this entity
983 */
984int
985xmlParserInputGrow(xmlParserInputPtr in, int len) {
986 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000987 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000988
989#ifdef DEBUG_INPUT
990 xmlGenericError(xmlGenericErrorContext, "Grow\n");
991#endif
992 if (in->buf == NULL) return(-1);
993 if (in->base == NULL) return(-1);
994 if (in->cur == NULL) return(-1);
995 if (in->buf->buffer == NULL) return(-1);
996
997 CHECK_BUFFER(in);
998
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000999 indx = in->cur - in->base;
1000 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001001
1002 CHECK_BUFFER(in);
1003
1004 return(0);
1005 }
1006 if (in->buf->readcallback != NULL)
1007 ret = xmlParserInputBufferGrow(in->buf, len);
1008 else
1009 return(0);
1010
1011 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001012 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001013 * block, but we use it really as an integer to do some
1014 * pointer arithmetic. Insure will raise it as a bug but in
1015 * that specific case, that's not !
1016 */
1017 if (in->base != in->buf->buffer->content) {
1018 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001019 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001020 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001021 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001022 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001023 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001024 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001025 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001026
1027 CHECK_BUFFER(in);
1028
1029 return(ret);
1030}
1031
1032/**
1033 * xmlParserInputShrink:
1034 * @in: an XML parser input
1035 *
1036 * This function removes used input for the parser.
1037 */
1038void
1039xmlParserInputShrink(xmlParserInputPtr in) {
1040 int used;
1041 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001042 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001043
1044#ifdef DEBUG_INPUT
1045 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1046#endif
1047 if (in->buf == NULL) return;
1048 if (in->base == NULL) return;
1049 if (in->cur == NULL) return;
1050 if (in->buf->buffer == NULL) return;
1051
1052 CHECK_BUFFER(in);
1053
1054 used = in->cur - in->buf->buffer->content;
1055 /*
1056 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001057 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001058 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001059 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001060 return;
1061 if (used > INPUT_CHUNK) {
1062 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1063 if (ret > 0) {
1064 in->cur -= ret;
1065 in->consumed += ret;
1066 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001067 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001068 }
1069
1070 CHECK_BUFFER(in);
1071
1072 if (in->buf->buffer->use > INPUT_CHUNK) {
1073 return;
1074 }
1075 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1076 if (in->base != in->buf->buffer->content) {
1077 /*
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001078 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001079 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001080 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001081 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001082 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001083 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001084 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001085
1086 CHECK_BUFFER(in);
1087}
1088
1089/************************************************************************
1090 * *
1091 * UTF8 character input and related functions *
1092 * *
1093 ************************************************************************/
1094
1095/**
1096 * xmlNextChar:
1097 * @ctxt: the XML parser context
1098 *
1099 * Skip to the next char input char.
1100 */
1101
1102void
1103xmlNextChar(xmlParserCtxtPtr ctxt) {
1104 if (ctxt->instate == XML_PARSER_EOF)
1105 return;
1106
1107 /*
1108 * 2.11 End-of-Line Handling
1109 * the literal two-character sequence "#xD#xA" or a standalone
1110 * literal #xD, an XML processor must pass to the application
1111 * the single character #xA.
1112 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00001113 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001114 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
Daniel Veillard561b7f82002-03-20 21:55:57 +00001152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00001203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001207 if (*ctxt->input->cur == 0)
Owen Taylor3473f882001-02-23 17:55:21 +00001208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
Owen Taylor3473f882001-02-23 17:55:21 +00001211 xmlParserHandlePEReference(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001212 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001230 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001231 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001232 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1233
1234 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00001235 ctxt->input->cur++;
Owen Taylor3473f882001-02-23 17:55:21 +00001236 return;
1237}
1238
1239/**
1240 * xmlCurrentChar:
1241 * @ctxt: the XML parser context
1242 * @len: pointer to the length of the char read
1243 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001244 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001245 * bytes in the input buffer. Implement the end of line normalization:
1246 * 2.11 End-of-Line Handling
1247 * Wherever an external parsed entity or the literal entity value
1248 * of an internal parsed entity contains either the literal two-character
1249 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1250 * must pass to the application the single character #xA.
1251 * This behavior can conveniently be produced by normalizing all
1252 * line breaks to #xA on input, before parsing.)
1253 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001254 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001255 */
1256
1257int
1258xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1259 if (ctxt->instate == XML_PARSER_EOF)
1260 return(0);
1261
Daniel Veillard561b7f82002-03-20 21:55:57 +00001262 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1263 *len = 1;
1264 return((int) *ctxt->input->cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001265 }
1266 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1267 /*
1268 * We are supposed to handle UTF8, check it's valid
1269 * From rfc2044: encoding of the Unicode values on UTF-8:
1270 *
1271 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1272 * 0000 0000-0000 007F 0xxxxxxx
1273 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1274 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1275 *
1276 * Check for the 0x110000 limit too
1277 */
1278 const unsigned char *cur = ctxt->input->cur;
1279 unsigned char c;
1280 unsigned int val;
1281
1282 c = *cur;
1283 if (c & 0x80) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001284 if (cur[1] == 0)
1285 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1286 if ((cur[1] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001287 goto encoding_error;
1288 if ((c & 0xe0) == 0xe0) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001289
1290 if (cur[2] == 0)
1291 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1292 if ((cur[2] & 0xc0) != 0x80)
Owen Taylor3473f882001-02-23 17:55:21 +00001293 goto encoding_error;
1294 if ((c & 0xf0) == 0xf0) {
1295 if (cur[3] == 0)
1296 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard561b7f82002-03-20 21:55:57 +00001297 if (((c & 0xf8) != 0xf0) ||
Owen Taylor3473f882001-02-23 17:55:21 +00001298 ((cur[3] & 0xc0) != 0x80))
1299 goto encoding_error;
1300 /* 4-byte code */
1301 *len = 4;
1302 val = (cur[0] & 0x7) << 18;
1303 val |= (cur[1] & 0x3f) << 12;
1304 val |= (cur[2] & 0x3f) << 6;
1305 val |= cur[3] & 0x3f;
1306 } else {
1307 /* 3-byte code */
1308 *len = 3;
1309 val = (cur[0] & 0xf) << 12;
1310 val |= (cur[1] & 0x3f) << 6;
1311 val |= cur[2] & 0x3f;
1312 }
1313 } else {
1314 /* 2-byte code */
1315 *len = 2;
1316 val = (cur[0] & 0x1f) << 6;
1317 val |= cur[1] & 0x3f;
1318 }
1319 if (!IS_CHAR(val)) {
1320 if ((ctxt->sax != NULL) &&
1321 (ctxt->sax->error != NULL))
1322 ctxt->sax->error(ctxt->userData,
1323 "Char 0x%X out of allowed range\n", val);
1324 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1325 ctxt->wellFormed = 0;
1326 ctxt->disableSAX = 1;
1327 }
1328 return(val);
1329 } else {
1330 /* 1-byte code */
1331 *len = 1;
1332 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001333 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001334 ctxt->nbChars++;
1335 ctxt->input->cur++;
1336 }
1337 return(0xA);
1338 }
1339 return((int) *ctxt->input->cur);
1340 }
1341 }
1342 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001343 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001344 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001345 * XML constructs only use < 128 chars
1346 */
1347 *len = 1;
1348 if (*ctxt->input->cur == 0xD) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00001349 if (ctxt->input->cur[1] == 0xA) {
Owen Taylor3473f882001-02-23 17:55:21 +00001350 ctxt->nbChars++;
1351 ctxt->input->cur++;
1352 }
1353 return(0xA);
1354 }
1355 return((int) *ctxt->input->cur);
1356encoding_error:
1357 /*
1358 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001359 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001360 * declaration header. Report the error and switch the encoding
1361 * to ISO-Latin-1 (if you don't like this policy, just declare the
1362 * encoding !)
1363 */
1364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1365 ctxt->sax->error(ctxt->userData,
1366 "Input is not proper UTF-8, indicate encoding !\n");
1367 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
Daniel Veillard561b7f82002-03-20 21:55:57 +00001368 ctxt->input->cur[0], ctxt->input->cur[1],
1369 ctxt->input->cur[2], ctxt->input->cur[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001370 }
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001371 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001372 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1373
1374 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1375 *len = 1;
1376 return((int) *ctxt->input->cur);
1377}
1378
1379/**
1380 * xmlStringCurrentChar:
1381 * @ctxt: the XML parser context
1382 * @cur: pointer to the beginning of the char
1383 * @len: pointer to the length of the char read
1384 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001385 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001386 * bytes in the input buffer.
1387 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001388 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001389 */
1390
1391int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001392xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1393{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001394 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001395 /*
1396 * We are supposed to handle UTF8, check it's valid
1397 * From rfc2044: encoding of the Unicode values on UTF-8:
1398 *
1399 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1400 * 0000 0000-0000 007F 0xxxxxxx
1401 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1402 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1403 *
1404 * Check for the 0x110000 limit too
1405 */
1406 unsigned char c;
1407 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001408
Daniel Veillardd8224e02002-01-13 15:43:22 +00001409 c = *cur;
1410 if (c & 0x80) {
1411 if ((cur[1] & 0xc0) != 0x80)
1412 goto encoding_error;
1413 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001414
Daniel Veillardd8224e02002-01-13 15:43:22 +00001415 if ((cur[2] & 0xc0) != 0x80)
1416 goto encoding_error;
1417 if ((c & 0xf0) == 0xf0) {
1418 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1419 goto encoding_error;
1420 /* 4-byte code */
1421 *len = 4;
1422 val = (cur[0] & 0x7) << 18;
1423 val |= (cur[1] & 0x3f) << 12;
1424 val |= (cur[2] & 0x3f) << 6;
1425 val |= cur[3] & 0x3f;
1426 } else {
1427 /* 3-byte code */
1428 *len = 3;
1429 val = (cur[0] & 0xf) << 12;
1430 val |= (cur[1] & 0x3f) << 6;
1431 val |= cur[2] & 0x3f;
1432 }
1433 } else {
1434 /* 2-byte code */
1435 *len = 2;
1436 val = (cur[0] & 0x1f) << 6;
1437 val |= cur[1] & 0x3f;
1438 }
1439 if (!IS_CHAR(val)) {
1440 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1441 (ctxt->sax->error != NULL))
1442 ctxt->sax->error(ctxt->userData,
1443 "Char 0x%X out of allowed range\n",
1444 val);
1445 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1446 ctxt->wellFormed = 0;
1447 ctxt->disableSAX = 1;
1448 }
1449 return (val);
1450 } else {
1451 /* 1-byte code */
1452 *len = 1;
1453 return ((int) *cur);
1454 }
Owen Taylor3473f882001-02-23 17:55:21 +00001455 }
1456 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001457 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001458 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001459 * XML constructs only use < 128 chars
1460 */
1461 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001462 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001463encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001464
Owen Taylor3473f882001-02-23 17:55:21 +00001465 /*
1466 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001467 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001468 * declaration header. Report the error and switch the encoding
1469 * to ISO-Latin-1 (if you don't like this policy, just declare the
1470 * encoding !)
1471 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001472 if (ctxt != NULL) {
1473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1474 ctxt->sax->error(ctxt->userData,
1475 "Input is not proper UTF-8, indicate encoding !\n");
1476 ctxt->sax->error(ctxt->userData,
1477 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1478 ctxt->input->cur[0], ctxt->input->cur[1],
1479 ctxt->input->cur[2], ctxt->input->cur[3]);
1480 }
1481 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Daniel Veillard8ab0f582002-02-18 18:31:38 +00001482 ctxt->wellFormed = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001483 }
Owen Taylor3473f882001-02-23 17:55:21 +00001484
1485 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001486 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001487}
1488
1489/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001490 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001491 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001492 * @val: the char value
1493 *
1494 * append the char value in the array
1495 *
1496 * Returns the number of xmlChar written
1497 */
Owen Taylor3473f882001-02-23 17:55:21 +00001498int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001499xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001500 /*
1501 * We are supposed to handle UTF8, check it's valid
1502 * From rfc2044: encoding of the Unicode values on UTF-8:
1503 *
1504 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1505 * 0000 0000-0000 007F 0xxxxxxx
1506 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1507 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1508 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001509 if (val >= 0x80) {
1510 xmlChar *savedout = out;
1511 int bits;
1512 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1513 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1514 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1515 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001516 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001517 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001518 val);
1519 return(0);
1520 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001521 for ( ; bits >= 0; bits-= 6)
1522 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1523 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001524 }
1525 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001526 return 1;
1527}
1528
1529/**
1530 * xmlCopyChar:
1531 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001532 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001533 * @val: the char value
1534 *
1535 * append the char value in the array
1536 *
1537 * Returns the number of xmlChar written
1538 */
1539
1540int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001541xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001542 /* the len parameter is ignored */
1543 if (val >= 0x80) {
1544 return(xmlCopyCharMultiByte (out, val));
1545 }
1546 *out = (xmlChar) val;
1547 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001548}
1549
1550/************************************************************************
1551 * *
1552 * Commodity functions to switch encodings *
1553 * *
1554 ************************************************************************/
1555
1556/**
1557 * xmlSwitchEncoding:
1558 * @ctxt: the parser context
1559 * @enc: the encoding value (number)
1560 *
1561 * change the input functions when discovering the character encoding
1562 * of a given entity.
1563 *
1564 * Returns 0 in case of success, -1 otherwise
1565 */
1566int
1567xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1568{
1569 xmlCharEncodingHandlerPtr handler;
1570
1571 switch (enc) {
1572 case XML_CHAR_ENCODING_ERROR:
1573 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1576 ctxt->wellFormed = 0;
1577 ctxt->disableSAX = 1;
1578 break;
1579 case XML_CHAR_ENCODING_NONE:
1580 /* let's assume it's UTF-8 without the XML decl */
1581 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1582 return(0);
1583 case XML_CHAR_ENCODING_UTF8:
1584 /* default encoding, no conversion should be needed */
1585 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001586
1587 /*
1588 * Errata on XML-1.0 June 20 2001
1589 * Specific handling of the Byte Order Mark for
1590 * UTF-8
1591 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001592 if ((ctxt->input != NULL) &&
1593 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001594 (ctxt->input->cur[1] == 0xBB) &&
1595 (ctxt->input->cur[2] == 0xBF)) {
1596 ctxt->input->cur += 3;
1597 }
Owen Taylor3473f882001-02-23 17:55:21 +00001598 return(0);
1599 default:
1600 break;
1601 }
1602 handler = xmlGetCharEncodingHandler(enc);
1603 if (handler == NULL) {
1604 /*
1605 * Default handlers.
1606 */
1607 switch (enc) {
1608 case XML_CHAR_ENCODING_ERROR:
1609 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1611 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1612 ctxt->wellFormed = 0;
1613 ctxt->disableSAX = 1;
1614 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1615 break;
1616 case XML_CHAR_ENCODING_NONE:
1617 /* let's assume it's UTF-8 without the XML decl */
1618 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1619 return(0);
1620 case XML_CHAR_ENCODING_UTF8:
1621 case XML_CHAR_ENCODING_ASCII:
1622 /* default encoding, no conversion should be needed */
1623 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1624 return(0);
1625 case XML_CHAR_ENCODING_UTF16LE:
1626 break;
1627 case XML_CHAR_ENCODING_UTF16BE:
1628 break;
1629 case XML_CHAR_ENCODING_UCS4LE:
1630 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1632 ctxt->sax->error(ctxt->userData,
1633 "char encoding USC4 little endian not supported\n");
1634 break;
1635 case XML_CHAR_ENCODING_UCS4BE:
1636 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638 ctxt->sax->error(ctxt->userData,
1639 "char encoding USC4 big endian not supported\n");
1640 break;
1641 case XML_CHAR_ENCODING_EBCDIC:
1642 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "char encoding EBCDIC not supported\n");
1646 break;
1647 case XML_CHAR_ENCODING_UCS4_2143:
1648 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650 ctxt->sax->error(ctxt->userData,
1651 "char encoding UCS4 2143 not supported\n");
1652 break;
1653 case XML_CHAR_ENCODING_UCS4_3412:
1654 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "char encoding UCS4 3412 not supported\n");
1658 break;
1659 case XML_CHAR_ENCODING_UCS2:
1660 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1662 ctxt->sax->error(ctxt->userData,
1663 "char encoding UCS2 not supported\n");
1664 break;
1665 case XML_CHAR_ENCODING_8859_1:
1666 case XML_CHAR_ENCODING_8859_2:
1667 case XML_CHAR_ENCODING_8859_3:
1668 case XML_CHAR_ENCODING_8859_4:
1669 case XML_CHAR_ENCODING_8859_5:
1670 case XML_CHAR_ENCODING_8859_6:
1671 case XML_CHAR_ENCODING_8859_7:
1672 case XML_CHAR_ENCODING_8859_8:
1673 case XML_CHAR_ENCODING_8859_9:
1674 /*
1675 * We used to keep the internal content in the
1676 * document encoding however this turns being unmaintainable
1677 * So xmlGetCharEncodingHandler() will return non-null
1678 * values for this now.
1679 */
1680 if ((ctxt->inputNr == 1) &&
1681 (ctxt->encoding == NULL) &&
1682 (ctxt->input->encoding != NULL)) {
1683 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1684 }
1685 ctxt->charset = enc;
1686 return(0);
1687 case XML_CHAR_ENCODING_2022_JP:
1688 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
1691 "char encoding ISO-2022-JPnot supported\n");
1692 break;
1693 case XML_CHAR_ENCODING_SHIFT_JIS:
1694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696 ctxt->sax->error(ctxt->userData,
1697 "char encoding Shift_JIS not supported\n");
1698 break;
1699 case XML_CHAR_ENCODING_EUC_JP:
1700 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702 ctxt->sax->error(ctxt->userData,
1703 "char encoding EUC-JPnot supported\n");
1704 break;
1705 }
1706 }
1707 if (handler == NULL)
1708 return(-1);
1709 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1710 return(xmlSwitchToEncoding(ctxt, handler));
1711}
1712
1713/**
1714 * xmlSwitchToEncoding:
1715 * @ctxt: the parser context
1716 * @handler: the encoding handler
1717 *
1718 * change the input functions when discovering the character encoding
1719 * of a given entity.
1720 *
1721 * Returns 0 in case of success, -1 otherwise
1722 */
1723int
1724xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1725{
1726 int nbchars;
1727
1728 if (handler != NULL) {
1729 if (ctxt->input != NULL) {
1730 if (ctxt->input->buf != NULL) {
1731 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard878eab02002-02-19 13:46:09 +00001732 /*
1733 * Check in case the auto encoding detetection triggered
1734 * in already.
1735 */
Owen Taylor3473f882001-02-23 17:55:21 +00001736 if (ctxt->input->buf->encoder == handler)
1737 return(0);
Daniel Veillard878eab02002-02-19 13:46:09 +00001738
1739 /*
1740 * "UTF-16" can be used for both LE and BE
1741 */
1742 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1743 BAD_CAST "UTF-16", 6)) &&
1744 (!xmlStrncmp(BAD_CAST handler->name,
1745 BAD_CAST "UTF-16", 6))) {
1746 return(0);
1747 }
1748
Owen Taylor3473f882001-02-23 17:55:21 +00001749 /*
1750 * Note: this is a bit dangerous, but that's what it
1751 * takes to use nearly compatible signature for different
1752 * encodings.
1753 */
1754 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1755 ctxt->input->buf->encoder = handler;
1756 return(0);
1757 }
1758 ctxt->input->buf->encoder = handler;
1759
1760 /*
1761 * Is there already some content down the pipe to convert ?
1762 */
1763 if ((ctxt->input->buf->buffer != NULL) &&
1764 (ctxt->input->buf->buffer->use > 0)) {
1765 int processed;
1766
1767 /*
1768 * Specific handling of the Byte Order Mark for
1769 * UTF-16
1770 */
1771 if ((handler->name != NULL) &&
1772 (!strcmp(handler->name, "UTF-16LE")) &&
1773 (ctxt->input->cur[0] == 0xFF) &&
1774 (ctxt->input->cur[1] == 0xFE)) {
1775 ctxt->input->cur += 2;
1776 }
1777 if ((handler->name != NULL) &&
1778 (!strcmp(handler->name, "UTF-16BE")) &&
1779 (ctxt->input->cur[0] == 0xFE) &&
1780 (ctxt->input->cur[1] == 0xFF)) {
1781 ctxt->input->cur += 2;
1782 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001783 /*
1784 * Errata on XML-1.0 June 20 2001
1785 * Specific handling of the Byte Order Mark for
1786 * UTF-8
1787 */
1788 if ((handler->name != NULL) &&
1789 (!strcmp(handler->name, "UTF-8")) &&
1790 (ctxt->input->cur[0] == 0xEF) &&
1791 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001792 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001793 ctxt->input->cur += 3;
1794 }
Owen Taylor3473f882001-02-23 17:55:21 +00001795
1796 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001797 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001798 * Move it as the raw buffer and create a new input buffer
1799 */
1800 processed = ctxt->input->cur - ctxt->input->base;
1801 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1802 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1803 ctxt->input->buf->buffer = xmlBufferCreate();
1804
1805 if (ctxt->html) {
1806 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001807 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001808 */
1809 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1810 ctxt->input->buf->buffer,
1811 ctxt->input->buf->raw);
1812 } else {
1813 /*
1814 * convert just enough to get
1815 * '<?xml version="1.0" encoding="xxx"?>'
1816 * parsed with the autodetected encoding
1817 * into the parser reading buffer.
1818 */
1819 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1820 ctxt->input->buf->buffer,
1821 ctxt->input->buf->raw);
1822 }
1823 if (nbchars < 0) {
1824 xmlGenericError(xmlGenericErrorContext,
1825 "xmlSwitchToEncoding: encoder error\n");
1826 return(-1);
1827 }
1828 ctxt->input->base =
1829 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001830 ctxt->input->end =
1831 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001832
1833 }
1834 return(0);
1835 } else {
1836 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1837 /*
1838 * When parsing a static memory array one must know the
1839 * size to be able to convert the buffer.
1840 */
1841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001843 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001844 return(-1);
1845 } else {
1846 int processed;
1847
1848 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001849 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001850 * Move it as the raw buffer and create a new input buffer
1851 */
1852 processed = ctxt->input->cur - ctxt->input->base;
1853
1854 ctxt->input->buf->raw = xmlBufferCreate();
1855 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1856 ctxt->input->length - processed);
1857 ctxt->input->buf->buffer = xmlBufferCreate();
1858
1859 /*
1860 * convert as much as possible of the raw input
1861 * to the parser reading buffer.
1862 */
1863 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1864 ctxt->input->buf->buffer,
1865 ctxt->input->buf->raw);
1866 if (nbchars < 0) {
1867 xmlGenericError(xmlGenericErrorContext,
1868 "xmlSwitchToEncoding: encoder error\n");
1869 return(-1);
1870 }
1871
1872 /*
1873 * Conversion succeeded, get rid of the old buffer
1874 */
1875 if ((ctxt->input->free != NULL) &&
1876 (ctxt->input->base != NULL))
1877 ctxt->input->free((xmlChar *) ctxt->input->base);
1878 ctxt->input->base =
1879 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001880 ctxt->input->end =
1881 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001882 }
1883 }
1884 } else {
1885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1886 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001887 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001888 return(-1);
1889 }
1890 /*
1891 * The parsing is now done in UTF8 natively
1892 */
1893 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1894 } else
1895 return(-1);
1896 return(0);
1897
1898}
1899
1900/************************************************************************
1901 * *
1902 * Commodity functions to handle entities processing *
1903 * *
1904 ************************************************************************/
1905
1906/**
1907 * xmlFreeInputStream:
1908 * @input: an xmlParserInputPtr
1909 *
1910 * Free up an input stream.
1911 */
1912void
1913xmlFreeInputStream(xmlParserInputPtr input) {
1914 if (input == NULL) return;
1915
1916 if (input->filename != NULL) xmlFree((char *) input->filename);
1917 if (input->directory != NULL) xmlFree((char *) input->directory);
1918 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1919 if (input->version != NULL) xmlFree((char *) input->version);
1920 if ((input->free != NULL) && (input->base != NULL))
1921 input->free((xmlChar *) input->base);
1922 if (input->buf != NULL)
1923 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001924 xmlFree(input);
1925}
1926
1927/**
1928 * xmlNewInputStream:
1929 * @ctxt: an XML parser context
1930 *
1931 * Create a new input stream structure
1932 * Returns the new input stream or NULL
1933 */
1934xmlParserInputPtr
1935xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1936 xmlParserInputPtr input;
1937
1938 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1939 if (input == NULL) {
1940 if (ctxt != NULL) {
1941 ctxt->errNo = XML_ERR_NO_MEMORY;
1942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1943 ctxt->sax->error(ctxt->userData,
1944 "malloc: couldn't allocate a new input stream\n");
1945 ctxt->errNo = XML_ERR_NO_MEMORY;
1946 }
1947 return(NULL);
1948 }
1949 memset(input, 0, sizeof(xmlParserInput));
1950 input->line = 1;
1951 input->col = 1;
1952 input->standalone = -1;
1953 return(input);
1954}
1955
1956/**
1957 * xmlNewIOInputStream:
1958 * @ctxt: an XML parser context
1959 * @input: an I/O Input
1960 * @enc: the charset encoding if known
1961 *
1962 * Create a new input stream structure encapsulating the @input into
1963 * a stream suitable for the parser.
1964 *
1965 * Returns the new input stream or NULL
1966 */
1967xmlParserInputPtr
1968xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1969 xmlCharEncoding enc) {
1970 xmlParserInputPtr inputStream;
1971
1972 if (xmlParserDebugEntities)
1973 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1974 inputStream = xmlNewInputStream(ctxt);
1975 if (inputStream == NULL) {
1976 return(NULL);
1977 }
1978 inputStream->filename = NULL;
1979 inputStream->buf = input;
1980 inputStream->base = inputStream->buf->buffer->content;
1981 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001982 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001983 if (enc != XML_CHAR_ENCODING_NONE) {
1984 xmlSwitchEncoding(ctxt, enc);
1985 }
1986
1987 return(inputStream);
1988}
1989
1990/**
1991 * xmlNewEntityInputStream:
1992 * @ctxt: an XML parser context
1993 * @entity: an Entity pointer
1994 *
1995 * Create a new input stream based on an xmlEntityPtr
1996 *
1997 * Returns the new input stream or NULL
1998 */
1999xmlParserInputPtr
2000xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2001 xmlParserInputPtr input;
2002
2003 if (entity == NULL) {
2004 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "internal: xmlNewEntityInputStream entity = NULL\n");
2008 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2009 return(NULL);
2010 }
2011 if (xmlParserDebugEntities)
2012 xmlGenericError(xmlGenericErrorContext,
2013 "new input from entity: %s\n", entity->name);
2014 if (entity->content == NULL) {
2015 switch (entity->etype) {
2016 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2017 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2019 ctxt->sax->error(ctxt->userData,
2020 "xmlNewEntityInputStream unparsed entity !\n");
2021 break;
2022 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2023 case XML_EXTERNAL_PARAMETER_ENTITY:
2024 return(xmlLoadExternalEntity((char *) entity->URI,
2025 (char *) entity->ExternalID, ctxt));
2026 case XML_INTERNAL_GENERAL_ENTITY:
2027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2028 ctxt->sax->error(ctxt->userData,
2029 "Internal entity %s without content !\n", entity->name);
2030 break;
2031 case XML_INTERNAL_PARAMETER_ENTITY:
2032 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2034 ctxt->sax->error(ctxt->userData,
2035 "Internal parameter entity %s without content !\n", entity->name);
2036 break;
2037 case XML_INTERNAL_PREDEFINED_ENTITY:
2038 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2040 ctxt->sax->error(ctxt->userData,
2041 "Predefined entity %s without content !\n", entity->name);
2042 break;
2043 }
2044 return(NULL);
2045 }
2046 input = xmlNewInputStream(ctxt);
2047 if (input == NULL) {
2048 return(NULL);
2049 }
2050 input->filename = (char *) entity->URI;
2051 input->base = entity->content;
2052 input->cur = entity->content;
2053 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002054 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002055 return(input);
2056}
2057
2058/**
2059 * xmlNewStringInputStream:
2060 * @ctxt: an XML parser context
2061 * @buffer: an memory buffer
2062 *
2063 * Create a new input stream based on a memory buffer.
2064 * Returns the new input stream
2065 */
2066xmlParserInputPtr
2067xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2068 xmlParserInputPtr input;
2069
2070 if (buffer == NULL) {
2071 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "internal: xmlNewStringInputStream string = NULL\n");
2075 return(NULL);
2076 }
2077 if (xmlParserDebugEntities)
2078 xmlGenericError(xmlGenericErrorContext,
2079 "new fixed input: %.30s\n", buffer);
2080 input = xmlNewInputStream(ctxt);
2081 if (input == NULL) {
2082 return(NULL);
2083 }
2084 input->base = buffer;
2085 input->cur = buffer;
2086 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002087 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002088 return(input);
2089}
2090
2091/**
2092 * xmlNewInputFromFile:
2093 * @ctxt: an XML parser context
2094 * @filename: the filename to use as entity
2095 *
2096 * Create a new input stream based on a file.
2097 *
2098 * Returns the new input stream or NULL in case of error
2099 */
2100xmlParserInputPtr
2101xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2102 xmlParserInputBufferPtr buf;
2103 xmlParserInputPtr inputStream;
2104 char *directory = NULL;
2105 xmlChar *URI = NULL;
2106
2107 if (xmlParserDebugEntities)
2108 xmlGenericError(xmlGenericErrorContext,
2109 "new input from file: %s\n", filename);
2110 if (ctxt == NULL) return(NULL);
2111 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2112 if (buf == NULL)
2113 return(NULL);
2114
2115 URI = xmlStrdup((xmlChar *) filename);
2116 directory = xmlParserGetDirectory((const char *) URI);
2117
2118 inputStream = xmlNewInputStream(ctxt);
2119 if (inputStream == NULL) {
2120 if (directory != NULL) xmlFree((char *) directory);
2121 if (URI != NULL) xmlFree((char *) URI);
2122 return(NULL);
2123 }
2124
2125 inputStream->filename = (const char *) URI;
2126 inputStream->directory = directory;
2127 inputStream->buf = buf;
2128
2129 inputStream->base = inputStream->buf->buffer->content;
2130 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002131 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002132 if ((ctxt->directory == NULL) && (directory != NULL))
2133 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2134 return(inputStream);
2135}
2136
2137/************************************************************************
2138 * *
2139 * Commodity functions to handle parser contexts *
2140 * *
2141 ************************************************************************/
2142
2143/**
2144 * xmlInitParserCtxt:
2145 * @ctxt: an XML parser context
2146 *
2147 * Initialize a parser context
2148 */
2149
2150void
2151xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2152{
2153 xmlSAXHandler *sax;
2154
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002155 if(ctxt==NULL) {
2156 xmlGenericError(xmlGenericErrorContext,
2157 "xmlInitParserCtxt: NULL context given\n");
2158 return;
2159 }
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161 xmlDefaultSAXHandlerInit();
2162
2163 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2164 if (sax == NULL) {
2165 xmlGenericError(xmlGenericErrorContext,
2166 "xmlInitParserCtxt: out of memory\n");
2167 }
2168 else
2169 memset(sax, 0, sizeof(xmlSAXHandler));
2170
2171 /* Allocate the Input stack */
2172 ctxt->inputTab = (xmlParserInputPtr *)
2173 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2174 if (ctxt->inputTab == NULL) {
2175 xmlGenericError(xmlGenericErrorContext,
2176 "xmlInitParserCtxt: out of memory\n");
2177 ctxt->inputNr = 0;
2178 ctxt->inputMax = 0;
2179 ctxt->input = NULL;
2180 return;
2181 }
2182 ctxt->inputNr = 0;
2183 ctxt->inputMax = 5;
2184 ctxt->input = NULL;
2185
2186 ctxt->version = NULL;
2187 ctxt->encoding = NULL;
2188 ctxt->standalone = -1;
2189 ctxt->hasExternalSubset = 0;
2190 ctxt->hasPErefs = 0;
2191 ctxt->html = 0;
2192 ctxt->external = 0;
2193 ctxt->instate = XML_PARSER_START;
2194 ctxt->token = 0;
2195 ctxt->directory = NULL;
2196
2197 /* Allocate the Node stack */
2198 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2199 if (ctxt->nodeTab == NULL) {
2200 xmlGenericError(xmlGenericErrorContext,
2201 "xmlInitParserCtxt: out of memory\n");
2202 ctxt->nodeNr = 0;
2203 ctxt->nodeMax = 0;
2204 ctxt->node = NULL;
2205 ctxt->inputNr = 0;
2206 ctxt->inputMax = 0;
2207 ctxt->input = NULL;
2208 return;
2209 }
2210 ctxt->nodeNr = 0;
2211 ctxt->nodeMax = 10;
2212 ctxt->node = NULL;
2213
2214 /* Allocate the Name stack */
2215 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2216 if (ctxt->nameTab == NULL) {
2217 xmlGenericError(xmlGenericErrorContext,
2218 "xmlInitParserCtxt: out of memory\n");
2219 ctxt->nodeNr = 0;
2220 ctxt->nodeMax = 0;
2221 ctxt->node = NULL;
2222 ctxt->inputNr = 0;
2223 ctxt->inputMax = 0;
2224 ctxt->input = NULL;
2225 ctxt->nameNr = 0;
2226 ctxt->nameMax = 0;
2227 ctxt->name = NULL;
2228 return;
2229 }
2230 ctxt->nameNr = 0;
2231 ctxt->nameMax = 10;
2232 ctxt->name = NULL;
2233
2234 /* Allocate the space stack */
2235 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2236 if (ctxt->spaceTab == NULL) {
2237 xmlGenericError(xmlGenericErrorContext,
2238 "xmlInitParserCtxt: out of memory\n");
2239 ctxt->nodeNr = 0;
2240 ctxt->nodeMax = 0;
2241 ctxt->node = NULL;
2242 ctxt->inputNr = 0;
2243 ctxt->inputMax = 0;
2244 ctxt->input = NULL;
2245 ctxt->nameNr = 0;
2246 ctxt->nameMax = 0;
2247 ctxt->name = NULL;
2248 ctxt->spaceNr = 0;
2249 ctxt->spaceMax = 0;
2250 ctxt->space = NULL;
2251 return;
2252 }
2253 ctxt->spaceNr = 1;
2254 ctxt->spaceMax = 10;
2255 ctxt->spaceTab[0] = -1;
2256 ctxt->space = &ctxt->spaceTab[0];
2257
Daniel Veillard14be0a12001-03-03 18:50:55 +00002258 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002259 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002260
Owen Taylor3473f882001-02-23 17:55:21 +00002261 ctxt->userData = ctxt;
2262 ctxt->myDoc = NULL;
2263 ctxt->wellFormed = 1;
2264 ctxt->valid = 1;
2265 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2266 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2267 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002268 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002269 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002270 if (ctxt->keepBlanks == 0)
2271 sax->ignorableWhitespace = ignorableWhitespace;
2272
Owen Taylor3473f882001-02-23 17:55:21 +00002273 ctxt->vctxt.userData = ctxt;
Daniel Veillard4e1b26c2002-02-03 20:13:06 +00002274 ctxt->vctxt.error = xmlParserValidityError;
2275 ctxt->vctxt.warning = xmlParserValidityWarning;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 if (ctxt->validate) {
Owen Taylor3473f882001-02-23 17:55:21 +00002277 if (xmlGetWarningsDefaultValue == 0)
2278 ctxt->vctxt.warning = NULL;
2279 else
2280 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002281 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
2283 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2284 ctxt->record_info = 0;
2285 ctxt->nbChars = 0;
2286 ctxt->checkIndex = 0;
2287 ctxt->inSubset = 0;
2288 ctxt->errNo = XML_ERR_OK;
2289 ctxt->depth = 0;
2290 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002291 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002292 xmlInitNodeInfoSeq(&ctxt->node_seq);
2293}
2294
2295/**
2296 * xmlFreeParserCtxt:
2297 * @ctxt: an XML parser context
2298 *
2299 * Free all the memory used by a parser context. However the parsed
2300 * document in ctxt->myDoc is not freed.
2301 */
2302
2303void
2304xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2305{
2306 xmlParserInputPtr input;
2307 xmlChar *oldname;
2308
2309 if (ctxt == NULL) return;
2310
2311 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2312 xmlFreeInputStream(input);
2313 }
2314 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2315 xmlFree(oldname);
2316 }
2317 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2318 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2319 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2320 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2321 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2322 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2323 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2324 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2325 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2327 xmlFree(ctxt->sax);
2328 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002329 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002330#ifdef LIBXML_CATALOG_ENABLED
2331 if (ctxt->catalogs != NULL)
2332 xmlCatalogFreeLocal(ctxt->catalogs);
2333#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002334 xmlFree(ctxt);
2335}
2336
2337/**
2338 * xmlNewParserCtxt:
2339 *
2340 * Allocate and initialize a new parser context.
2341 *
2342 * Returns the xmlParserCtxtPtr or NULL
2343 */
2344
2345xmlParserCtxtPtr
2346xmlNewParserCtxt()
2347{
2348 xmlParserCtxtPtr ctxt;
2349
2350 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2351 if (ctxt == NULL) {
2352 xmlGenericError(xmlGenericErrorContext,
2353 "xmlNewParserCtxt : cannot allocate context\n");
2354 perror("malloc");
2355 return(NULL);
2356 }
2357 memset(ctxt, 0, sizeof(xmlParserCtxt));
2358 xmlInitParserCtxt(ctxt);
2359 return(ctxt);
2360}
2361
2362/************************************************************************
2363 * *
2364 * Handling of node informations *
2365 * *
2366 ************************************************************************/
2367
2368/**
2369 * xmlClearParserCtxt:
2370 * @ctxt: an XML parser context
2371 *
2372 * Clear (release owned resources) and reinitialize a parser context
2373 */
2374
2375void
2376xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2377{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002378 if (ctxt==NULL)
2379 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002380 xmlClearNodeInfoSeq(&ctxt->node_seq);
2381 xmlInitParserCtxt(ctxt);
2382}
2383
2384/**
2385 * xmlParserFindNodeInfo:
2386 * @ctxt: an XML parser context
2387 * @node: an XML node within the tree
2388 *
2389 * Find the parser node info struct for a given node
2390 *
2391 * Returns an xmlParserNodeInfo block pointer or NULL
2392 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002393const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2394 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002395{
2396 unsigned long pos;
2397
2398 /* Find position where node should be at */
2399 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002400 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002401 return &ctx->node_seq.buffer[pos];
2402 else
2403 return NULL;
2404}
2405
2406
2407/**
2408 * xmlInitNodeInfoSeq:
2409 * @seq: a node info sequence pointer
2410 *
2411 * -- Initialize (set to initial state) node info sequence
2412 */
2413void
2414xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2415{
2416 seq->length = 0;
2417 seq->maximum = 0;
2418 seq->buffer = NULL;
2419}
2420
2421/**
2422 * xmlClearNodeInfoSeq:
2423 * @seq: a node info sequence pointer
2424 *
2425 * -- Clear (release memory and reinitialize) node
2426 * info sequence
2427 */
2428void
2429xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2430{
2431 if ( seq->buffer != NULL )
2432 xmlFree(seq->buffer);
2433 xmlInitNodeInfoSeq(seq);
2434}
2435
2436
2437/**
2438 * xmlParserFindNodeInfoIndex:
2439 * @seq: a node info sequence pointer
2440 * @node: an XML node pointer
2441 *
2442 *
2443 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2444 * the given node is or should be at in a sorted sequence
2445 *
2446 * Returns a long indicating the position of the record
2447 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002448unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2449 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002450{
2451 unsigned long upper, lower, middle;
2452 int found = 0;
2453
2454 /* Do a binary search for the key */
2455 lower = 1;
2456 upper = seq->length;
2457 middle = 0;
2458 while ( lower <= upper && !found) {
2459 middle = lower + (upper - lower) / 2;
2460 if ( node == seq->buffer[middle - 1].node )
2461 found = 1;
2462 else if ( node < seq->buffer[middle - 1].node )
2463 upper = middle - 1;
2464 else
2465 lower = middle + 1;
2466 }
2467
2468 /* Return position */
2469 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2470 return middle;
2471 else
2472 return middle - 1;
2473}
2474
2475
2476/**
2477 * xmlParserAddNodeInfo:
2478 * @ctxt: an XML parser context
2479 * @info: a node info sequence pointer
2480 *
2481 * Insert node info record into the sorted sequence
2482 */
2483void
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002484xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002485 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002486{
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002487 unsigned long pos;
Owen Taylor3473f882001-02-23 17:55:21 +00002488
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002489 /* Find pos and check to see if node is already in the sequence */
2490 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2491 info->node);
2492 if (pos < ctxt->node_seq.length
2493 && ctxt->node_seq.buffer[pos].node == info->node) {
2494 ctxt->node_seq.buffer[pos] = *info;
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002497 /* Otherwise, we need to add new node to buffer */
2498 else {
2499 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2500 xmlParserNodeInfo *tmp_buffer;
2501 unsigned int byte_size;
Owen Taylor3473f882001-02-23 17:55:21 +00002502
Daniel Veillardc8c7be42002-01-23 17:53:44 +00002503 if (ctxt->node_seq.maximum == 0)
2504 ctxt->node_seq.maximum = 2;
2505 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2506 (2 * ctxt->node_seq.maximum));
2507
2508 if (ctxt->node_seq.buffer == NULL)
2509 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2510 else
2511 tmp_buffer =
2512 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2513 byte_size);
2514
2515 if (tmp_buffer == NULL) {
2516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2517 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2518 ctxt->errNo = XML_ERR_NO_MEMORY;
2519 return;
2520 }
2521 ctxt->node_seq.buffer = tmp_buffer;
2522 ctxt->node_seq.maximum *= 2;
2523 }
2524
2525 /* If position is not at end, move elements out of the way */
2526 if (pos != ctxt->node_seq.length) {
2527 unsigned long i;
2528
2529 for (i = ctxt->node_seq.length; i > pos; i--)
2530 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2531 }
2532
2533 /* Copy element and increase length */
2534 ctxt->node_seq.buffer[pos] = *info;
2535 ctxt->node_seq.length++;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537}
2538
2539/************************************************************************
2540 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002541 * Defaults settings *
2542 * *
2543 ************************************************************************/
2544/**
2545 * xmlPedanticParserDefault:
2546 * @val: int 0 or 1
2547 *
2548 * Set and return the previous value for enabling pedantic warnings.
2549 *
2550 * Returns the last value for 0 for no substitution, 1 for substitution.
2551 */
2552
2553int
2554xmlPedanticParserDefault(int val) {
2555 int old = xmlPedanticParserDefaultValue;
2556
2557 xmlPedanticParserDefaultValue = val;
2558 return(old);
2559}
2560
2561/**
2562 * xmlLineNumbersDefault:
2563 * @val: int 0 or 1
2564 *
2565 * Set and return the previous value for enabling line numbers in elements
2566 * contents. This may break on old application and is turned off by default.
2567 *
2568 * Returns the last value for 0 for no substitution, 1 for substitution.
2569 */
2570
2571int
2572xmlLineNumbersDefault(int val) {
2573 int old = xmlLineNumbersDefaultValue;
2574
2575 xmlLineNumbersDefaultValue = val;
2576 return(old);
2577}
2578
2579/**
2580 * xmlSubstituteEntitiesDefault:
2581 * @val: int 0 or 1
2582 *
2583 * Set and return the previous value for default entity support.
2584 * Initially the parser always keep entity references instead of substituting
2585 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002586 * default parser behavior
2587 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002588 * file basis.
2589 *
2590 * Returns the last value for 0 for no substitution, 1 for substitution.
2591 */
2592
2593int
2594xmlSubstituteEntitiesDefault(int val) {
2595 int old = xmlSubstituteEntitiesDefaultValue;
2596
2597 xmlSubstituteEntitiesDefaultValue = val;
2598 return(old);
2599}
2600
2601/**
2602 * xmlKeepBlanksDefault:
2603 * @val: int 0 or 1
2604 *
2605 * Set and return the previous value for default blanks text nodes support.
2606 * The 1.x version of the parser used an heuristic to try to detect
2607 * ignorable white spaces. As a result the SAX callback was generating
2608 * ignorableWhitespace() callbacks instead of characters() one, and when
2609 * using the DOM output text nodes containing those blanks were not generated.
2610 * The 2.x and later version will switch to the XML standard way and
2611 * ignorableWhitespace() are only generated when running the parser in
2612 * validating mode and when the current element doesn't allow CDATA or
2613 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002614 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002615 * on 1.X libs and to switch back to the old mode for compatibility when
2616 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2617 * by using xmlIsBlankNode() commodity function to detect the "empty"
2618 * nodes generated.
2619 * This value also affect autogeneration of indentation when saving code
2620 * if blanks sections are kept, indentation is not generated.
2621 *
2622 * Returns the last value for 0 for no substitution, 1 for substitution.
2623 */
2624
2625int
2626xmlKeepBlanksDefault(int val) {
2627 int old = xmlKeepBlanksDefaultValue;
2628
2629 xmlKeepBlanksDefaultValue = val;
2630 xmlIndentTreeOutput = !val;
2631 return(old);
2632}
2633
2634/************************************************************************
2635 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002636 * Deprecated functions kept for compatibility *
2637 * *
2638 ************************************************************************/
2639
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002640/**
2641 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002642 * @lang: pointer to the string value
2643 *
2644 * Checks that the value conforms to the LanguageID production:
2645 *
2646 * NOTE: this is somewhat deprecated, those productions were removed from
2647 * the XML Second edition.
2648 *
2649 * [33] LanguageID ::= Langcode ('-' Subcode)*
2650 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2651 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2652 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2653 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2654 * [38] Subcode ::= ([a-z] | [A-Z])+
2655 *
2656 * Returns 1 if correct 0 otherwise
2657 **/
2658int
2659xmlCheckLanguageID(const xmlChar *lang) {
2660 const xmlChar *cur = lang;
2661
2662 if (cur == NULL)
2663 return(0);
2664 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2665 ((cur[0] == 'I') && (cur[1] == '-'))) {
2666 /*
2667 * IANA code
2668 */
2669 cur += 2;
2670 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2671 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2672 cur++;
2673 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2674 ((cur[0] == 'X') && (cur[1] == '-'))) {
2675 /*
2676 * User code
2677 */
2678 cur += 2;
2679 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2680 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2681 cur++;
2682 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2683 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2684 /*
2685 * ISO639
2686 */
2687 cur++;
2688 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2689 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2690 cur++;
2691 else
2692 return(0);
2693 } else
2694 return(0);
2695 while (cur[0] != 0) { /* non input consuming */
2696 if (cur[0] != '-')
2697 return(0);
2698 cur++;
2699 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2700 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2701 cur++;
2702 else
2703 return(0);
2704 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2705 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2706 cur++;
2707 }
2708 return(1);
2709}
2710
2711/**
2712 * xmlDecodeEntities:
2713 * @ctxt: the parser context
2714 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2715 * @len: the len to decode (in bytes !), -1 for no size limit
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2719 *
2720 * This function is deprecated, we now always process entities content
2721 * through xmlStringDecodeEntities
2722 *
2723 * TODO: remove it in next major release.
2724 *
2725 * [67] Reference ::= EntityRef | CharRef
2726 *
2727 * [69] PEReference ::= '%' Name ';'
2728 *
2729 * Returns A newly allocated string with the substitution done. The caller
2730 * must deallocate it !
2731 */
2732xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002733xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2734 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002735#if 0
2736 xmlChar *buffer = NULL;
2737 unsigned int buffer_size = 0;
2738 unsigned int nbchars = 0;
2739
2740 xmlChar *current = NULL;
2741 xmlEntityPtr ent;
2742 unsigned int max = (unsigned int) len;
2743 int c,l;
2744#endif
2745
2746 static int deprecated = 0;
2747 if (!deprecated) {
2748 xmlGenericError(xmlGenericErrorContext,
2749 "xmlDecodeEntities() deprecated function reached\n");
2750 deprecated = 1;
2751 }
2752
2753#if 0
2754 if (ctxt->depth > 40) {
2755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2756 ctxt->sax->error(ctxt->userData,
2757 "Detected entity reference loop\n");
2758 ctxt->wellFormed = 0;
2759 ctxt->disableSAX = 1;
2760 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2761 return(NULL);
2762 }
2763
2764 /*
2765 * allocate a translation buffer.
2766 */
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2769 if (buffer == NULL) {
2770 perror("xmlDecodeEntities: malloc failed");
2771 return(NULL);
2772 }
2773
2774 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002775 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002776 */
2777 GROW;
2778 c = CUR_CHAR(l);
2779 while ((nbchars < max) && (c != end) && /* NOTUSED */
2780 (c != end2) && (c != end3)) {
2781 GROW;
2782 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002783 if ((c == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002784 int val = xmlParseCharRef(ctxt);
2785 COPY_BUF(0,buffer,nbchars,val);
2786 NEXTL(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002787 } else if (c == '&') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002788 (what & XML_SUBSTITUTE_REF)) {
2789 if (xmlParserDebugEntities)
2790 xmlGenericError(xmlGenericErrorContext,
2791 "decoding Entity Reference\n");
2792 ent = xmlParseEntityRef(ctxt);
2793 if ((ent != NULL) &&
2794 (ctxt->replaceEntities != 0)) {
2795 current = ent->content;
2796 while (*current != 0) { /* non input consuming loop */
2797 buffer[nbchars++] = *current++;
2798 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2799 growBuffer(buffer);
2800 }
2801 }
2802 } else if (ent != NULL) {
2803 const xmlChar *cur = ent->name;
2804
2805 buffer[nbchars++] = '&';
2806 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2807 growBuffer(buffer);
2808 }
2809 while (*cur != 0) { /* non input consuming loop */
2810 buffer[nbchars++] = *cur++;
2811 }
2812 buffer[nbchars++] = ';';
2813 }
2814 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2815 /*
2816 * a PEReference induce to switch the entity flow,
2817 * we break here to flush the current set of chars
2818 * parsed if any. We will be called back later.
2819 */
2820 if (xmlParserDebugEntities)
2821 xmlGenericError(xmlGenericErrorContext,
2822 "decoding PE Reference\n");
2823 if (nbchars != 0) break;
2824
2825 xmlParsePEReference(ctxt);
2826
2827 /*
2828 * Pop-up of finished entities.
2829 */
2830 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2831 xmlPopInput(ctxt);
2832
2833 break;
2834 } else {
2835 COPY_BUF(l,buffer,nbchars,c);
2836 NEXTL(l);
2837 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2838 growBuffer(buffer);
2839 }
2840 }
2841 c = CUR_CHAR(l);
2842 }
2843 buffer[nbchars++] = 0;
2844 return(buffer);
2845#endif
2846 return(NULL);
2847}
2848
2849/**
2850 * xmlNamespaceParseNCName:
2851 * @ctxt: an XML parser context
2852 *
2853 * parse an XML namespace name.
2854 *
2855 * TODO: this seems not in use anymore, the namespace handling is done on
2856 * top of the SAX interfaces, i.e. not on raw input.
2857 *
2858 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2859 *
2860 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2861 * CombiningChar | Extender
2862 *
2863 * Returns the namespace name or NULL
2864 */
2865
2866xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002867xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002868#if 0
2869 xmlChar buf[XML_MAX_NAMELEN + 5];
2870 int len = 0, l;
2871 int cur = CUR_CHAR(l);
2872#endif
2873
2874 static int deprecated = 0;
2875 if (!deprecated) {
2876 xmlGenericError(xmlGenericErrorContext,
2877 "xmlNamespaceParseNCName() deprecated function reached\n");
2878 deprecated = 1;
2879 }
2880
2881#if 0
2882 /* load first the value of the char !!! */
2883 GROW;
2884 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2885
2886xmlGenericError(xmlGenericErrorContext,
2887 "xmlNamespaceParseNCName: reached loop 3\n");
2888 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2889 (cur == '.') || (cur == '-') ||
2890 (cur == '_') ||
2891 (IS_COMBINING(cur)) ||
2892 (IS_EXTENDER(cur))) {
2893 COPY_BUF(l,buf,len,cur);
2894 NEXTL(l);
2895 cur = CUR_CHAR(l);
2896 if (len >= XML_MAX_NAMELEN) {
2897 xmlGenericError(xmlGenericErrorContext,
2898 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2899 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2900 (cur == '.') || (cur == '-') ||
2901 (cur == '_') ||
2902 (IS_COMBINING(cur)) ||
2903 (IS_EXTENDER(cur))) {
2904 NEXTL(l);
2905 cur = CUR_CHAR(l);
2906 }
2907 break;
2908 }
2909 }
2910 return(xmlStrndup(buf, len));
2911#endif
2912 return(NULL);
2913}
2914
2915/**
2916 * xmlNamespaceParseQName:
2917 * @ctxt: an XML parser context
2918 * @prefix: a xmlChar **
2919 *
2920 * TODO: this seems not in use anymore, the namespace handling is done on
2921 * top of the SAX interfaces, i.e. not on raw input.
2922 *
2923 * parse an XML qualified name
2924 *
2925 * [NS 5] QName ::= (Prefix ':')? LocalPart
2926 *
2927 * [NS 6] Prefix ::= NCName
2928 *
2929 * [NS 7] LocalPart ::= NCName
2930 *
2931 * Returns the local part, and prefix is updated
2932 * to get the Prefix if any.
2933 */
2934
2935xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002936xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002937
2938 static int deprecated = 0;
2939 if (!deprecated) {
2940 xmlGenericError(xmlGenericErrorContext,
2941 "xmlNamespaceParseQName() deprecated function reached\n");
2942 deprecated = 1;
2943 }
2944
2945#if 0
2946 xmlChar *ret = NULL;
2947
2948 *prefix = NULL;
2949 ret = xmlNamespaceParseNCName(ctxt);
2950 if (RAW == ':') {
2951 *prefix = ret;
2952 NEXT;
2953 ret = xmlNamespaceParseNCName(ctxt);
2954 }
2955
2956 return(ret);
2957#endif
2958 return(NULL);
2959}
2960
2961/**
2962 * xmlNamespaceParseNSDef:
2963 * @ctxt: an XML parser context
2964 *
2965 * parse a namespace prefix declaration
2966 *
2967 * TODO: this seems not in use anymore, the namespace handling is done on
2968 * top of the SAX interfaces, i.e. not on raw input.
2969 *
2970 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2971 *
2972 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2973 *
2974 * Returns the namespace name
2975 */
2976
2977xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002978xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002979 static int deprecated = 0;
2980 if (!deprecated) {
2981 xmlGenericError(xmlGenericErrorContext,
2982 "xmlNamespaceParseNSDef() deprecated function reached\n");
2983 deprecated = 1;
2984 }
2985 return(NULL);
2986#if 0
2987 xmlChar *name = NULL;
2988
2989 if ((RAW == 'x') && (NXT(1) == 'm') &&
2990 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2991 (NXT(4) == 's')) {
2992 SKIP(5);
2993 if (RAW == ':') {
2994 NEXT;
2995 name = xmlNamespaceParseNCName(ctxt);
2996 }
2997 }
2998 return(name);
2999#endif
3000}
3001
3002/**
3003 * xmlParseQuotedString:
3004 * @ctxt: an XML parser context
3005 *
3006 * Parse and return a string between quotes or doublequotes
3007 *
3008 * TODO: Deprecated, to be removed at next drop of binary compatibility
3009 *
3010 * Returns the string parser or NULL.
3011 */
3012xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003013xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003014 static int deprecated = 0;
3015 if (!deprecated) {
3016 xmlGenericError(xmlGenericErrorContext,
3017 "xmlParseQuotedString() deprecated function reached\n");
3018 deprecated = 1;
3019 }
3020 return(NULL);
3021
3022#if 0
3023 xmlChar *buf = NULL;
3024 int len = 0,l;
3025 int size = XML_PARSER_BUFFER_SIZE;
3026 int c;
3027
3028 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3029 if (buf == NULL) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "malloc of %d byte failed\n", size);
3032 return(NULL);
3033 }
3034xmlGenericError(xmlGenericErrorContext,
3035 "xmlParseQuotedString: reached loop 4\n");
3036 if (RAW == '"') {
3037 NEXT;
3038 c = CUR_CHAR(l);
3039 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3040 if (len + 5 >= size) {
3041 size *= 2;
3042 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3043 if (buf == NULL) {
3044 xmlGenericError(xmlGenericErrorContext,
3045 "realloc of %d byte failed\n", size);
3046 return(NULL);
3047 }
3048 }
3049 COPY_BUF(l,buf,len,c);
3050 NEXTL(l);
3051 c = CUR_CHAR(l);
3052 }
3053 if (c != '"') {
3054 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3056 ctxt->sax->error(ctxt->userData,
3057 "String not closed \"%.50s\"\n", buf);
3058 ctxt->wellFormed = 0;
3059 ctxt->disableSAX = 1;
3060 } else {
3061 NEXT;
3062 }
3063 } else if (RAW == '\''){
3064 NEXT;
3065 c = CUR;
3066 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3067 if (len + 1 >= size) {
3068 size *= 2;
3069 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3070 if (buf == NULL) {
3071 xmlGenericError(xmlGenericErrorContext,
3072 "realloc of %d byte failed\n", size);
3073 return(NULL);
3074 }
3075 }
3076 buf[len++] = c;
3077 NEXT;
3078 c = CUR;
3079 }
3080 if (RAW != '\'') {
3081 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3083 ctxt->sax->error(ctxt->userData,
3084 "String not closed \"%.50s\"\n", buf);
3085 ctxt->wellFormed = 0;
3086 ctxt->disableSAX = 1;
3087 } else {
3088 NEXT;
3089 }
3090 }
3091 return(buf);
3092#endif
3093}
3094
3095/**
3096 * xmlParseNamespace:
3097 * @ctxt: an XML parser context
3098 *
3099 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3100 *
3101 * This is what the older xml-name Working Draft specified, a bunch of
3102 * other stuff may still rely on it, so support is still here as
3103 * if it was declared on the root of the Tree:-(
3104 *
3105 * TODO: remove from library
3106 *
3107 * To be removed at next drop of binary compatibility
3108 */
3109
3110void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003111xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003112 static int deprecated = 0;
3113 if (!deprecated) {
3114 xmlGenericError(xmlGenericErrorContext,
3115 "xmlParseNamespace() deprecated function reached\n");
3116 deprecated = 1;
3117 }
3118
3119#if 0
3120 xmlChar *href = NULL;
3121 xmlChar *prefix = NULL;
3122 int garbage = 0;
3123
3124 /*
3125 * We just skipped "namespace" or "xml:namespace"
3126 */
3127 SKIP_BLANKS;
3128
3129xmlGenericError(xmlGenericErrorContext,
3130 "xmlParseNamespace: reached loop 5\n");
3131 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3132 /*
3133 * We can have "ns" or "prefix" attributes
3134 * Old encoding as 'href' or 'AS' attributes is still supported
3135 */
3136 if ((RAW == 'n') && (NXT(1) == 's')) {
3137 garbage = 0;
3138 SKIP(2);
3139 SKIP_BLANKS;
3140
3141 if (RAW != '=') continue;
3142 NEXT;
3143 SKIP_BLANKS;
3144
3145 href = xmlParseQuotedString(ctxt);
3146 SKIP_BLANKS;
3147 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3148 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3149 garbage = 0;
3150 SKIP(4);
3151 SKIP_BLANKS;
3152
3153 if (RAW != '=') continue;
3154 NEXT;
3155 SKIP_BLANKS;
3156
3157 href = xmlParseQuotedString(ctxt);
3158 SKIP_BLANKS;
3159 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3160 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3161 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3162 garbage = 0;
3163 SKIP(6);
3164 SKIP_BLANKS;
3165
3166 if (RAW != '=') continue;
3167 NEXT;
3168 SKIP_BLANKS;
3169
3170 prefix = xmlParseQuotedString(ctxt);
3171 SKIP_BLANKS;
3172 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3173 garbage = 0;
3174 SKIP(2);
3175 SKIP_BLANKS;
3176
3177 if (RAW != '=') continue;
3178 NEXT;
3179 SKIP_BLANKS;
3180
3181 prefix = xmlParseQuotedString(ctxt);
3182 SKIP_BLANKS;
3183 } else if ((RAW == '?') && (NXT(1) == '>')) {
3184 garbage = 0;
3185 NEXT;
3186 } else {
3187 /*
3188 * Found garbage when parsing the namespace
3189 */
3190 if (!garbage) {
3191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3192 ctxt->sax->error(ctxt->userData,
3193 "xmlParseNamespace found garbage\n");
3194 }
3195 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3196 ctxt->wellFormed = 0;
3197 ctxt->disableSAX = 1;
3198 NEXT;
3199 }
3200 }
3201
3202 MOVETO_ENDTAG(CUR_PTR);
3203 NEXT;
3204
3205 /*
3206 * Register the DTD.
3207 if (href != NULL)
3208 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3209 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3210 */
3211
3212 if (prefix != NULL) xmlFree(prefix);
3213 if (href != NULL) xmlFree(href);
3214#endif
3215}
3216
3217/**
3218 * xmlScanName:
3219 * @ctxt: an XML parser context
3220 *
3221 * Trickery: parse an XML name but without consuming the input flow
3222 * Needed for rollback cases. Used only when parsing entities references.
3223 *
3224 * TODO: seems deprecated now, only used in the default part of
3225 * xmlParserHandleReference
3226 *
3227 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3228 * CombiningChar | Extender
3229 *
3230 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3231 *
3232 * [6] Names ::= Name (S Name)*
3233 *
3234 * Returns the Name parsed or NULL
3235 */
3236
3237xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003238xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003239 static int deprecated = 0;
3240 if (!deprecated) {
3241 xmlGenericError(xmlGenericErrorContext,
3242 "xmlScanName() deprecated function reached\n");
3243 deprecated = 1;
3244 }
3245 return(NULL);
3246
3247#if 0
3248 xmlChar buf[XML_MAX_NAMELEN];
3249 int len = 0;
3250
3251 GROW;
3252 if (!IS_LETTER(RAW) && (RAW != '_') &&
3253 (RAW != ':')) {
3254 return(NULL);
3255 }
3256
3257
3258 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3259 (NXT(len) == '.') || (NXT(len) == '-') ||
3260 (NXT(len) == '_') || (NXT(len) == ':') ||
3261 (IS_COMBINING(NXT(len))) ||
3262 (IS_EXTENDER(NXT(len)))) {
3263 GROW;
3264 buf[len] = NXT(len);
3265 len++;
3266 if (len >= XML_MAX_NAMELEN) {
3267 xmlGenericError(xmlGenericErrorContext,
3268 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3269 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3270 (IS_DIGIT(NXT(len))) ||
3271 (NXT(len) == '.') || (NXT(len) == '-') ||
3272 (NXT(len) == '_') || (NXT(len) == ':') ||
3273 (IS_COMBINING(NXT(len))) ||
3274 (IS_EXTENDER(NXT(len))))
3275 len++;
3276 break;
3277 }
3278 }
3279 return(xmlStrndup(buf, len));
3280#endif
3281}
3282
3283/**
3284 * xmlParserHandleReference:
3285 * @ctxt: the parser context
3286 *
3287 * TODO: Remove, now deprecated ... the test is done directly in the
3288 * content parsing
3289 * routines.
3290 *
3291 * [67] Reference ::= EntityRef | CharRef
3292 *
3293 * [68] EntityRef ::= '&' Name ';'
3294 *
3295 * [ WFC: Entity Declared ]
3296 * the Name given in the entity reference must match that in an entity
3297 * declaration, except that well-formed documents need not declare any
3298 * of the following entities: amp, lt, gt, apos, quot.
3299 *
3300 * [ WFC: Parsed Entity ]
3301 * An entity reference must not contain the name of an unparsed entity
3302 *
3303 * [66] CharRef ::= '&#' [0-9]+ ';' |
3304 * '&#x' [0-9a-fA-F]+ ';'
3305 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003306 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003307 * the handling is done accordingly to
3308 * http://www.w3.org/TR/REC-xml#entproc
3309 */
3310void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003311xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003312 static int deprecated = 0;
3313 if (!deprecated) {
3314 xmlGenericError(xmlGenericErrorContext,
3315 "xmlParserHandleReference() deprecated function reached\n");
3316 deprecated = 1;
3317 }
3318
Owen Taylor3473f882001-02-23 17:55:21 +00003319 return;
3320}
3321
3322/**
3323 * xmlHandleEntity:
3324 * @ctxt: an XML parser context
3325 * @entity: an XML entity pointer.
3326 *
3327 * Default handling of defined entities, when should we define a new input
3328 * stream ? When do we just handle that as a set of chars ?
3329 *
3330 * OBSOLETE: to be removed at some point.
3331 */
3332
3333void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003334xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 static int deprecated = 0;
3336 if (!deprecated) {
3337 xmlGenericError(xmlGenericErrorContext,
3338 "xmlHandleEntity() deprecated function reached\n");
3339 deprecated = 1;
3340 }
3341
3342#if 0
3343 int len;
3344 xmlParserInputPtr input;
3345
3346 if (entity->content == NULL) {
3347 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3349 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3350 entity->name);
3351 ctxt->wellFormed = 0;
3352 ctxt->disableSAX = 1;
3353 return;
3354 }
3355 len = xmlStrlen(entity->content);
3356 if (len <= 2) goto handle_as_char;
3357
3358 /*
3359 * Redefine its content as an input stream.
3360 */
3361 input = xmlNewEntityInputStream(ctxt, entity);
3362 xmlPushInput(ctxt, input);
3363 return;
3364
3365handle_as_char:
3366 /*
3367 * Just handle the content as a set of chars.
3368 */
3369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3370 (ctxt->sax->characters != NULL))
3371 ctxt->sax->characters(ctxt->userData, entity->content, len);
3372#endif
3373}
3374
3375/**
3376 * xmlNewGlobalNs:
3377 * @doc: the document carrying the namespace
3378 * @href: the URI associated
3379 * @prefix: the prefix for the namespace
3380 *
3381 * Creation of a Namespace, the old way using PI and without scoping
3382 * DEPRECATED !!!
3383 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003384 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003385 */
3386xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003387xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3388 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003389 static int deprecated = 0;
3390 if (!deprecated) {
3391 xmlGenericError(xmlGenericErrorContext,
3392 "xmlNewGlobalNs() deprecated function reached\n");
3393 deprecated = 1;
3394 }
3395 return(NULL);
3396#if 0
3397 xmlNodePtr root;
3398
3399 xmlNsPtr cur;
3400
3401 root = xmlDocGetRootElement(doc);
3402 if (root != NULL)
3403 return(xmlNewNs(root, href, prefix));
3404
3405 /*
3406 * if there is no root element yet, create an old Namespace type
3407 * and it will be moved to the root at save time.
3408 */
3409 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3410 if (cur == NULL) {
3411 xmlGenericError(xmlGenericErrorContext,
3412 "xmlNewGlobalNs : malloc failed\n");
3413 return(NULL);
3414 }
3415 memset(cur, 0, sizeof(xmlNs));
3416 cur->type = XML_GLOBAL_NAMESPACE;
3417
3418 if (href != NULL)
3419 cur->href = xmlStrdup(href);
3420 if (prefix != NULL)
3421 cur->prefix = xmlStrdup(prefix);
3422
3423 /*
3424 * Add it at the end to preserve parsing order ...
3425 */
3426 if (doc != NULL) {
3427 if (doc->oldNs == NULL) {
3428 doc->oldNs = cur;
3429 } else {
3430 xmlNsPtr prev = doc->oldNs;
3431
3432 while (prev->next != NULL) prev = prev->next;
3433 prev->next = cur;
3434 }
3435 }
3436
3437 return(NULL);
3438#endif
3439}
3440
3441/**
3442 * xmlUpgradeOldNs:
3443 * @doc: a document pointer
3444 *
3445 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3446 * DEPRECATED
3447 */
3448void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003449xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003450 static int deprecated = 0;
3451 if (!deprecated) {
3452 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003453 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003454 deprecated = 1;
3455 }
3456#if 0
3457 xmlNsPtr cur;
3458
3459 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3460 if (doc->children == NULL) {
3461#ifdef DEBUG_TREE
3462 xmlGenericError(xmlGenericErrorContext,
3463 "xmlUpgradeOldNs: failed no root !\n");
3464#endif
3465 return;
3466 }
3467
3468 cur = doc->oldNs;
3469 while (cur->next != NULL) {
3470 cur->type = XML_LOCAL_NAMESPACE;
3471 cur = cur->next;
3472 }
3473 cur->type = XML_LOCAL_NAMESPACE;
3474 cur->next = doc->children->nsDef;
3475 doc->children->nsDef = doc->oldNs;
3476 doc->oldNs = NULL;
3477#endif
3478}
3479