blob: 9832a52924fbc5e41883c577abf9e4b7040d9bbe [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
84 exit(1);
85 }
86 if ((myversion / 100) < (version / 100)) {
87 xmlGenericError(xmlGenericErrorContext,
88 "Warning: program compiled against libxml %d using older %d\n",
89 (version / 100), (myversion / 100));
90 }
91}
92
93
Daniel Veillard22090732001-07-16 00:06:07 +000094static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000095 "validate",
96 "load subset",
97 "keep blanks",
98 "disable SAX",
99 "fetch external entities",
100 "substitute entities",
101 "gather line info",
102 "user data",
103 "is html",
104 "is standalone",
105 "stop parser",
106 "document",
107 "is well formed",
108 "is valid",
109 "SAX block",
110 "SAX function internalSubset",
111 "SAX function isStandalone",
112 "SAX function hasInternalSubset",
113 "SAX function hasExternalSubset",
114 "SAX function resolveEntity",
115 "SAX function getEntity",
116 "SAX function entityDecl",
117 "SAX function notationDecl",
118 "SAX function attributeDecl",
119 "SAX function elementDecl",
120 "SAX function unparsedEntityDecl",
121 "SAX function setDocumentLocator",
122 "SAX function startDocument",
123 "SAX function endDocument",
124 "SAX function startElement",
125 "SAX function endElement",
126 "SAX function reference",
127 "SAX function characters",
128 "SAX function ignorableWhitespace",
129 "SAX function processingInstruction",
130 "SAX function comment",
131 "SAX function warning",
132 "SAX function error",
133 "SAX function fatalError",
134 "SAX function getParameterEntity",
135 "SAX function cdataBlock",
136 "SAX function externalSubset",
137};
138
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000139/**
Owen Taylor3473f882001-02-23 17:55:21 +0000140 * xmlGetFeaturesList:
141 * @len: the length of the features name array (input/output)
142 * @result: an array of string to be filled with the features name.
143 *
144 * Copy at most *@len feature names into the @result array
145 *
146 * Returns -1 in case or error, or the total number of features,
147 * len is updated with the number of strings copied,
148 * strings must not be deallocated
149 */
150int
151xmlGetFeaturesList(int *len, const char **result) {
152 int ret, i;
153
154 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
155 if ((len == NULL) || (result == NULL))
156 return(ret);
157 if ((*len < 0) || (*len >= 1000))
158 return(-1);
159 if (*len > ret)
160 *len = ret;
161 for (i = 0;i < *len;i++)
162 result[i] = xmlFeaturesList[i];
163 return(ret);
164}
165
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166/**
Owen Taylor3473f882001-02-23 17:55:21 +0000167 * xmlGetFeature:
168 * @ctxt: an XML/HTML parser context
169 * @name: the feature name
170 * @result: location to store the result
171 *
172 * Read the current value of one feature of this parser instance
173 *
174 * Returns -1 in case or error, 0 otherwise
175 */
176int
177xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
178 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
179 return(-1);
180
181 if (!strcmp(name, "validate")) {
182 *((int *) result) = ctxt->validate;
183 } else if (!strcmp(name, "keep blanks")) {
184 *((int *) result) = ctxt->keepBlanks;
185 } else if (!strcmp(name, "disable SAX")) {
186 *((int *) result) = ctxt->disableSAX;
187 } else if (!strcmp(name, "fetch external entities")) {
188 *((int *) result) = ctxt->loadsubset;
189 } else if (!strcmp(name, "substitute entities")) {
190 *((int *) result) = ctxt->replaceEntities;
191 } else if (!strcmp(name, "gather line info")) {
192 *((int *) result) = ctxt->record_info;
193 } else if (!strcmp(name, "user data")) {
194 *((void **)result) = ctxt->userData;
195 } else if (!strcmp(name, "is html")) {
196 *((int *) result) = ctxt->html;
197 } else if (!strcmp(name, "is standalone")) {
198 *((int *) result) = ctxt->standalone;
199 } else if (!strcmp(name, "document")) {
200 *((xmlDocPtr *) result) = ctxt->myDoc;
201 } else if (!strcmp(name, "is well formed")) {
202 *((int *) result) = ctxt->wellFormed;
203 } else if (!strcmp(name, "is valid")) {
204 *((int *) result) = ctxt->valid;
205 } else if (!strcmp(name, "SAX block")) {
206 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
207 } else if (!strcmp(name, "SAX function internalSubset")) {
208 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
209 } else if (!strcmp(name, "SAX function isStandalone")) {
210 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
211 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
212 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
213 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
214 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
215 } else if (!strcmp(name, "SAX function resolveEntity")) {
216 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
217 } else if (!strcmp(name, "SAX function getEntity")) {
218 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
219 } else if (!strcmp(name, "SAX function entityDecl")) {
220 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
221 } else if (!strcmp(name, "SAX function notationDecl")) {
222 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
223 } else if (!strcmp(name, "SAX function attributeDecl")) {
224 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
225 } else if (!strcmp(name, "SAX function elementDecl")) {
226 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
227 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
228 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
229 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
230 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
231 } else if (!strcmp(name, "SAX function startDocument")) {
232 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
233 } else if (!strcmp(name, "SAX function endDocument")) {
234 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
235 } else if (!strcmp(name, "SAX function startElement")) {
236 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
237 } else if (!strcmp(name, "SAX function endElement")) {
238 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
239 } else if (!strcmp(name, "SAX function reference")) {
240 *((referenceSAXFunc *) result) = ctxt->sax->reference;
241 } else if (!strcmp(name, "SAX function characters")) {
242 *((charactersSAXFunc *) result) = ctxt->sax->characters;
243 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
244 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
245 } else if (!strcmp(name, "SAX function processingInstruction")) {
246 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
247 } else if (!strcmp(name, "SAX function comment")) {
248 *((commentSAXFunc *) result) = ctxt->sax->comment;
249 } else if (!strcmp(name, "SAX function warning")) {
250 *((warningSAXFunc *) result) = ctxt->sax->warning;
251 } else if (!strcmp(name, "SAX function error")) {
252 *((errorSAXFunc *) result) = ctxt->sax->error;
253 } else if (!strcmp(name, "SAX function fatalError")) {
254 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
255 } else if (!strcmp(name, "SAX function getParameterEntity")) {
256 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
257 } else if (!strcmp(name, "SAX function cdataBlock")) {
258 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
259 } else if (!strcmp(name, "SAX function externalSubset")) {
260 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
261 } else {
262 return(-1);
263 }
264 return(0);
265}
266
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000267/**
Owen Taylor3473f882001-02-23 17:55:21 +0000268 * xmlSetFeature:
269 * @ctxt: an XML/HTML parser context
270 * @name: the feature name
271 * @value: pointer to the location of the new value
272 *
273 * Change the current value of one feature of this parser instance
274 *
275 * Returns -1 in case or error, 0 otherwise
276 */
277int
278xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
279 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
280 return(-1);
281
282 if (!strcmp(name, "validate")) {
283 int newvalidate = *((int *) value);
284 if ((!ctxt->validate) && (newvalidate != 0)) {
285 if (ctxt->vctxt.warning == NULL)
286 ctxt->vctxt.warning = xmlParserValidityWarning;
287 if (ctxt->vctxt.error == NULL)
288 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000289 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000290 }
291 ctxt->validate = newvalidate;
292 } else if (!strcmp(name, "keep blanks")) {
293 ctxt->keepBlanks = *((int *) value);
294 } else if (!strcmp(name, "disable SAX")) {
295 ctxt->disableSAX = *((int *) value);
296 } else if (!strcmp(name, "fetch external entities")) {
297 ctxt->loadsubset = *((int *) value);
298 } else if (!strcmp(name, "substitute entities")) {
299 ctxt->replaceEntities = *((int *) value);
300 } else if (!strcmp(name, "gather line info")) {
301 ctxt->record_info = *((int *) value);
302 } else if (!strcmp(name, "user data")) {
303 ctxt->userData = *((void **)value);
304 } else if (!strcmp(name, "is html")) {
305 ctxt->html = *((int *) value);
306 } else if (!strcmp(name, "is standalone")) {
307 ctxt->standalone = *((int *) value);
308 } else if (!strcmp(name, "document")) {
309 ctxt->myDoc = *((xmlDocPtr *) value);
310 } else if (!strcmp(name, "is well formed")) {
311 ctxt->wellFormed = *((int *) value);
312 } else if (!strcmp(name, "is valid")) {
313 ctxt->valid = *((int *) value);
314 } else if (!strcmp(name, "SAX block")) {
315 ctxt->sax = *((xmlSAXHandlerPtr *) value);
316 } else if (!strcmp(name, "SAX function internalSubset")) {
317 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function isStandalone")) {
319 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
321 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
323 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function resolveEntity")) {
325 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function getEntity")) {
327 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function entityDecl")) {
329 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function notationDecl")) {
331 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function attributeDecl")) {
333 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function elementDecl")) {
335 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
337 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
339 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startDocument")) {
341 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endDocument")) {
343 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function startElement")) {
345 ctxt->sax->startElement = *((startElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function endElement")) {
347 ctxt->sax->endElement = *((endElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function reference")) {
349 ctxt->sax->reference = *((referenceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function characters")) {
351 ctxt->sax->characters = *((charactersSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
353 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function processingInstruction")) {
355 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function comment")) {
357 ctxt->sax->comment = *((commentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function warning")) {
359 ctxt->sax->warning = *((warningSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function error")) {
361 ctxt->sax->error = *((errorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function fatalError")) {
363 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function getParameterEntity")) {
365 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
366 } else if (!strcmp(name, "SAX function cdataBlock")) {
367 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function externalSubset")) {
369 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
370 } else {
371 return(-1);
372 }
373 return(0);
374}
375
376/************************************************************************
377 * *
378 * Some functions to avoid too large macros *
379 * *
380 ************************************************************************/
381
382/**
383 * xmlIsChar:
384 * @c: an unicode character (int)
385 *
386 * Check whether the character is allowed by the production
387 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
388 * | [#x10000-#x10FFFF]
389 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
390 * Also available as a macro IS_CHAR()
391 *
392 * Returns 0 if not, non-zero otherwise
393 */
394int
395xmlIsChar(int c) {
396 return(
397 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
398 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
399 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
400 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
401}
402
403/**
404 * xmlIsBlank:
405 * @c: an unicode character (int)
406 *
407 * Check whether the character is allowed by the production
408 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
409 * Also available as a macro IS_BLANK()
410 *
411 * Returns 0 if not, non-zero otherwise
412 */
413int
414xmlIsBlank(int c) {
415 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
416}
417
418/**
419 * xmlIsBaseChar:
420 * @c: an unicode character (int)
421 *
422 * Check whether the character is allowed by the production
423 * [85] BaseChar ::= ... long list see REC ...
424 *
425 * VI is your friend !
426 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
427 * and
428 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
429 *
430 * Returns 0 if not, non-zero otherwise
431 */
432static int xmlBaseArray[] = {
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
446 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449};
450
451int
452xmlIsBaseChar(int c) {
453 return(
454 (((c) < 0x0100) ? xmlBaseArray[c] :
455 ( /* accelerator */
456 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
457 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
458 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
459 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
460 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
461 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
462 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
463 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
464 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
465 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
466 ((c) == 0x0386) ||
467 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
468 ((c) == 0x038C) ||
469 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
470 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
471 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
472 ((c) == 0x03DA) ||
473 ((c) == 0x03DC) ||
474 ((c) == 0x03DE) ||
475 ((c) == 0x03E0) ||
476 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
477 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
478 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
479 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
480 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
481 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
482 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
483 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
484 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
485 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
486 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
487 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
488 ((c) == 0x0559) ||
489 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
490 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
491 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
492 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
493 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
494 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
495 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
496 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
497 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
498 ((c) == 0x06D5) ||
499 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
500 (((c) >= 0x905) && ( /* accelerator */
501 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
502 ((c) == 0x093D) ||
503 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
504 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
505 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
506 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
507 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
508 ((c) == 0x09B2) ||
509 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
510 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
511 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
512 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
513 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
514 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
515 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
516 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
517 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
518 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
519 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
520 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
521 ((c) == 0x0A5E) ||
522 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
523 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
524 ((c) == 0x0A8D) ||
525 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
526 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
527 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
528 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
529 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
530 ((c) == 0x0ABD) ||
531 ((c) == 0x0AE0) ||
532 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
533 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
534 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
535 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
536 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
537 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
538 ((c) == 0x0B3D) ||
539 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
540 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
541 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
542 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
543 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
544 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
545 ((c) == 0x0B9C) ||
546 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
547 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
548 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
549 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
550 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
551 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
552 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
553 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
554 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
555 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
556 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
557 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
558 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
559 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
560 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
561 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
562 ((c) == 0x0CDE) ||
563 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
564 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
565 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
566 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
567 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
568 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
569 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
570 ((c) == 0x0E30) ||
571 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
572 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
573 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
574 ((c) == 0x0E84) ||
575 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
576 ((c) == 0x0E8A) ||
577 ((c) == 0x0E8D) ||
578 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
579 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
580 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
581 ((c) == 0x0EA5) ||
582 ((c) == 0x0EA7) ||
583 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
584 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
585 ((c) == 0x0EB0) ||
586 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
587 ((c) == 0x0EBD) ||
588 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
589 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
590 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
591 (((c) >= 0x10A0) && ( /* accelerator */
592 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
593 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
594 ((c) == 0x1100) ||
595 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
596 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
597 ((c) == 0x1109) ||
598 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
599 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
600 ((c) == 0x113C) ||
601 ((c) == 0x113E) ||
602 ((c) == 0x1140) ||
603 ((c) == 0x114C) ||
604 ((c) == 0x114E) ||
605 ((c) == 0x1150) ||
606 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
607 ((c) == 0x1159) ||
608 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
609 ((c) == 0x1163) ||
610 ((c) == 0x1165) ||
611 ((c) == 0x1167) ||
612 ((c) == 0x1169) ||
613 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
614 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
615 ((c) == 0x1175) ||
616 ((c) == 0x119E) ||
617 ((c) == 0x11A8) ||
618 ((c) == 0x11AB) ||
619 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
620 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
621 ((c) == 0x11BA) ||
622 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 ((c) == 0x11EB) ||
624 ((c) == 0x11F0) ||
625 ((c) == 0x11F9) ||
626 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
627 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
628 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
629 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
630 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
631 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
632 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 ((c) == 0x1F59) ||
634 ((c) == 0x1F5B) ||
635 ((c) == 0x1F5D) ||
636 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
637 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
638 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
639 ((c) == 0x1FBE) ||
640 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
641 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
642 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
643 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
644 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
645 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
646 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
647 ((c) == 0x2126) ||
648 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
649 ((c) == 0x212E) ||
650 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
651 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
652 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
653 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
654 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
655}
656
657/**
658 * xmlIsDigit:
659 * @c: an unicode character (int)
660 *
661 * Check whether the character is allowed by the production
662 * [88] Digit ::= ... long list see REC ...
663 *
664 * Returns 0 if not, non-zero otherwise
665 */
666int
667xmlIsDigit(int c) {
668 return(
669 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
670 (((c) >= 0x660) && ( /* accelerator */
671 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
672 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
673 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
674 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
675 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
676 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
677 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
678 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
679 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
680 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
681 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
682 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
683 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
684 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
685}
686
687/**
688 * xmlIsCombining:
689 * @c: an unicode character (int)
690 *
691 * Check whether the character is allowed by the production
692 * [87] CombiningChar ::= ... long list see REC ...
693 *
694 * Returns 0 if not, non-zero otherwise
695 */
696int
697xmlIsCombining(int c) {
698 return(
699 (((c) >= 0x300) && ( /* accelerator */
700 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
701 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
702 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
703 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
704 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
705 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
706 ((c) == 0x05BF) ||
707 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
708 ((c) == 0x05C4) ||
709 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
710 ((c) == 0x0670) ||
711 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
712 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
713 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
714 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
715 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
716 (((c) >= 0x0901) && ( /* accelerator */
717 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
718 ((c) == 0x093C) ||
719 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
720 ((c) == 0x094D) ||
721 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
722 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
723 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 ((c) == 0x09BC) ||
725 ((c) == 0x09BE) ||
726 ((c) == 0x09BF) ||
727 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
728 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
729 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
730 ((c) == 0x09D7) ||
731 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
732 (((c) >= 0x0A02) && ( /* accelerator */
733 ((c) == 0x0A02) ||
734 ((c) == 0x0A3C) ||
735 ((c) == 0x0A3E) ||
736 ((c) == 0x0A3F) ||
737 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
738 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
739 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
740 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
741 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
742 ((c) == 0x0ABC) ||
743 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
744 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
745 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
746 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
747 ((c) == 0x0B3C) ||
748 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
749 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
750 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
751 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
752 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
753 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
754 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
755 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
756 ((c) == 0x0BD7) ||
757 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
758 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
759 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
760 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
761 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
762 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
763 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
764 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
765 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
766 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
767 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
768 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
769 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
770 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
771 ((c) == 0x0D57) ||
772 (((c) >= 0x0E31) && ( /* accelerator */
773 ((c) == 0x0E31) ||
774 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
775 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
776 ((c) == 0x0EB1) ||
777 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
778 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
779 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
780 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
781 ((c) == 0x0F35) ||
782 ((c) == 0x0F37) ||
783 ((c) == 0x0F39) ||
784 ((c) == 0x0F3E) ||
785 ((c) == 0x0F3F) ||
786 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
787 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
788 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
789 ((c) == 0x0F97) ||
790 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
791 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
792 ((c) == 0x0FB9) ||
793 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
794 ((c) == 0x20E1) ||
795 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
796 ((c) == 0x3099) ||
797 ((c) == 0x309A))))))))));
798}
799
800/**
801 * xmlIsExtender:
802 * @c: an unicode character (int)
803 *
804 * Check whether the character is allowed by the production
805 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
806 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
807 * [#x309D-#x309E] | [#x30FC-#x30FE]
808 *
809 * Returns 0 if not, non-zero otherwise
810 */
811int
812xmlIsExtender(int c) {
813 switch (c) {
814 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
815 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
816 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
817 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
818 case 0x30FE:
819 return 1;
820 default:
821 return 0;
822 }
823}
824
825/**
826 * xmlIsIdeographic:
827 * @c: an unicode character (int)
828 *
829 * Check whether the character is allowed by the production
830 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
831 *
832 * Returns 0 if not, non-zero otherwise
833 */
834int
835xmlIsIdeographic(int c) {
836 return(((c) < 0x0100) ? 0 :
837 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
838 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
839 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
840 ((c) == 0x3007));
841}
842
843/**
844 * xmlIsLetter:
845 * @c: an unicode character (int)
846 *
847 * Check whether the character is allowed by the production
848 * [84] Letter ::= BaseChar | Ideographic
849 *
850 * Returns 0 if not, non-zero otherwise
851 */
852int
853xmlIsLetter(int c) {
854 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
855}
856
857/**
858 * xmlIsPubidChar:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsPubidChar(int c) {
868 return(
869 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
870 (((c) >= 'a') && ((c) <= 'z')) ||
871 (((c) >= 'A') && ((c) <= 'Z')) ||
872 (((c) >= '0') && ((c) <= '9')) ||
873 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
874 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
875 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
876 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
877 ((c) == '$') || ((c) == '_') || ((c) == '%'));
878}
879
880/************************************************************************
881 * *
882 * Input handling functions for progressive parsing *
883 * *
884 ************************************************************************/
885
886/* #define DEBUG_INPUT */
887/* #define DEBUG_STACK */
888/* #define DEBUG_PUSH */
889
890
891/* we need to keep enough input to show errors in context */
892#define LINE_LEN 80
893
894#ifdef DEBUG_INPUT
895#define CHECK_BUFFER(in) check_buffer(in)
896
897void check_buffer(xmlParserInputPtr in) {
898 if (in->base != in->buf->buffer->content) {
899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: base mismatch problem\n");
901 }
902 if (in->cur < in->base) {
903 xmlGenericError(xmlGenericErrorContext,
904 "xmlParserInput: cur < base problem\n");
905 }
906 if (in->cur > in->base + in->buf->buffer->use) {
907 xmlGenericError(xmlGenericErrorContext,
908 "xmlParserInput: cur > base + use problem\n");
909 }
910 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
911 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
912 in->buf->buffer->use, in->buf->buffer->size);
913}
914
915#else
916#define CHECK_BUFFER(in)
917#endif
918
919
920/**
921 * xmlParserInputRead:
922 * @in: an XML parser input
923 * @len: an indicative size for the lookahead
924 *
925 * This function refresh the input for the parser. It doesn't try to
926 * preserve pointers to the input buffer, and discard already read data
927 *
928 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 * end of this entity
930 */
931int
932xmlParserInputRead(xmlParserInputPtr in, int len) {
933 int ret;
934 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000935 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000936
937#ifdef DEBUG_INPUT
938 xmlGenericError(xmlGenericErrorContext, "Read\n");
939#endif
940 if (in->buf == NULL) return(-1);
941 if (in->base == NULL) return(-1);
942 if (in->cur == NULL) return(-1);
943 if (in->buf->buffer == NULL) return(-1);
944 if (in->buf->readcallback == NULL) return(-1);
945
946 CHECK_BUFFER(in);
947
948 used = in->cur - in->buf->buffer->content;
949 ret = xmlBufferShrink(in->buf->buffer, used);
950 if (ret > 0) {
951 in->cur -= ret;
952 in->consumed += ret;
953 }
954 ret = xmlParserInputBufferRead(in->buf, len);
955 if (in->base != in->buf->buffer->content) {
956 /*
957 * the buffer has been realloced
958 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000959 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000960 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000962 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000963 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000964
965 CHECK_BUFFER(in);
966
967 return(ret);
968}
969
970/**
971 * xmlParserInputGrow:
972 * @in: an XML parser input
973 * @len: an indicative size for the lookahead
974 *
975 * This function increase the input for the parser. It tries to
976 * preserve pointers to the input buffer, and keep already read data
977 *
978 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
979 * end of this entity
980 */
981int
982xmlParserInputGrow(xmlParserInputPtr in, int len) {
983 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000984 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000985
986#ifdef DEBUG_INPUT
987 xmlGenericError(xmlGenericErrorContext, "Grow\n");
988#endif
989 if (in->buf == NULL) return(-1);
990 if (in->base == NULL) return(-1);
991 if (in->cur == NULL) return(-1);
992 if (in->buf->buffer == NULL) return(-1);
993
994 CHECK_BUFFER(in);
995
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000996 indx = in->cur - in->base;
997 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000998
999 CHECK_BUFFER(in);
1000
1001 return(0);
1002 }
1003 if (in->buf->readcallback != NULL)
1004 ret = xmlParserInputBufferGrow(in->buf, len);
1005 else
1006 return(0);
1007
1008 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001009 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001010 * block, but we use it really as an integer to do some
1011 * pointer arithmetic. Insure will raise it as a bug but in
1012 * that specific case, that's not !
1013 */
1014 if (in->base != in->buf->buffer->content) {
1015 /*
1016 * the buffer has been realloced
1017 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001018 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001019 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001021 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001022 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001023
1024 CHECK_BUFFER(in);
1025
1026 return(ret);
1027}
1028
1029/**
1030 * xmlParserInputShrink:
1031 * @in: an XML parser input
1032 *
1033 * This function removes used input for the parser.
1034 */
1035void
1036xmlParserInputShrink(xmlParserInputPtr in) {
1037 int used;
1038 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001039 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001040
1041#ifdef DEBUG_INPUT
1042 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1043#endif
1044 if (in->buf == NULL) return;
1045 if (in->base == NULL) return;
1046 if (in->cur == NULL) return;
1047 if (in->buf->buffer == NULL) return;
1048
1049 CHECK_BUFFER(in);
1050
1051 used = in->cur - in->buf->buffer->content;
1052 /*
1053 * Do not shrink on large buffers whose only a tiny fraction
1054 * was consumned
1055 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001056 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001057 return;
1058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 if (ret > 0) {
1061 in->cur -= ret;
1062 in->consumed += ret;
1063 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066
1067 CHECK_BUFFER(in);
1068
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 return;
1071 }
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1074 /*
1075 * the buffer has been realloced
1076 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001077 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001078 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001080 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001082
1083 CHECK_BUFFER(in);
1084}
1085
1086/************************************************************************
1087 * *
1088 * UTF8 character input and related functions *
1089 * *
1090 ************************************************************************/
1091
1092/**
1093 * xmlNextChar:
1094 * @ctxt: the XML parser context
1095 *
1096 * Skip to the next char input char.
1097 */
1098
1099void
1100xmlNextChar(xmlParserCtxtPtr ctxt) {
1101 if (ctxt->instate == XML_PARSER_EOF)
1102 return;
1103
1104 /*
1105 * 2.11 End-of-Line Handling
1106 * the literal two-character sequence "#xD#xA" or a standalone
1107 * literal #xD, an XML processor must pass to the application
1108 * the single character #xA.
1109 */
1110 if (ctxt->token != 0) ctxt->token = 0;
1111 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1112 if ((*ctxt->input->cur == 0) &&
1113 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1114 (ctxt->instate != XML_PARSER_COMMENT)) {
1115 /*
1116 * If we are at the end of the current entity and
1117 * the context allows it, we pop consumed entities
1118 * automatically.
1119 * the auto closing should be blocked in other cases
1120 */
1121 xmlPopInput(ctxt);
1122 } else {
1123 if (*(ctxt->input->cur) == '\n') {
1124 ctxt->input->line++; ctxt->input->col = 1;
1125 } else ctxt->input->col++;
1126 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1127 /*
1128 * We are supposed to handle UTF8, check it's valid
1129 * From rfc2044: encoding of the Unicode values on UTF-8:
1130 *
1131 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1132 * 0000 0000-0000 007F 0xxxxxxx
1133 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1134 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1135 *
1136 * Check for the 0x110000 limit too
1137 */
1138 const unsigned char *cur = ctxt->input->cur;
1139 unsigned char c;
1140
1141 c = *cur;
1142 if (c & 0x80) {
1143 if (cur[1] == 0)
1144 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1145 if ((cur[1] & 0xc0) != 0x80)
1146 goto encoding_error;
1147 if ((c & 0xe0) == 0xe0) {
1148 unsigned int val;
1149
1150 if (cur[2] == 0)
1151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152 if ((cur[2] & 0xc0) != 0x80)
1153 goto encoding_error;
1154 if ((c & 0xf0) == 0xf0) {
1155 if (cur[3] == 0)
1156 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1157 if (((c & 0xf8) != 0xf0) ||
1158 ((cur[3] & 0xc0) != 0x80))
1159 goto encoding_error;
1160 /* 4-byte code */
1161 ctxt->input->cur += 4;
1162 val = (cur[0] & 0x7) << 18;
1163 val |= (cur[1] & 0x3f) << 12;
1164 val |= (cur[2] & 0x3f) << 6;
1165 val |= cur[3] & 0x3f;
1166 } else {
1167 /* 3-byte code */
1168 ctxt->input->cur += 3;
1169 val = (cur[0] & 0xf) << 12;
1170 val |= (cur[1] & 0x3f) << 6;
1171 val |= cur[2] & 0x3f;
1172 }
1173 if (((val > 0xd7ff) && (val < 0xe000)) ||
1174 ((val > 0xfffd) && (val < 0x10000)) ||
1175 (val >= 0x110000)) {
1176 if ((ctxt->sax != NULL) &&
1177 (ctxt->sax->error != NULL))
1178 ctxt->sax->error(ctxt->userData,
1179 "Char 0x%X out of allowed range\n", val);
1180 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1181 ctxt->wellFormed = 0;
1182 ctxt->disableSAX = 1;
1183 }
1184 } else
1185 /* 2-byte code */
1186 ctxt->input->cur += 2;
1187 } else
1188 /* 1-byte code */
1189 ctxt->input->cur++;
1190 } else {
1191 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001192 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001193 * a compatibke encoding for the ASCII set, since
1194 * XML constructs only use < 128 chars
1195 */
1196 ctxt->input->cur++;
1197 }
1198 ctxt->nbChars++;
1199 if (*ctxt->input->cur == 0)
1200 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1201 }
1202 } else {
1203 ctxt->input->cur++;
1204 ctxt->nbChars++;
1205 if (*ctxt->input->cur == 0)
1206 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1207 }
1208 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1209 xmlParserHandlePEReference(ctxt);
1210 if ((*ctxt->input->cur == 0) &&
1211 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1212 xmlPopInput(ctxt);
1213 return;
1214encoding_error:
1215 /*
1216 * If we detect an UTF8 error that probably mean that the
1217 * input encoding didn't get properly advertized in the
1218 * declaration header. Report the error and switch the encoding
1219 * to ISO-Latin-1 (if you don't like this policy, just declare the
1220 * encoding !)
1221 */
1222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1223 ctxt->sax->error(ctxt->userData,
1224 "Input is not proper UTF-8, indicate encoding !\n");
1225 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1226 ctxt->input->cur[0], ctxt->input->cur[1],
1227 ctxt->input->cur[2], ctxt->input->cur[3]);
1228 }
1229 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1230
1231 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1232 ctxt->input->cur++;
1233 return;
1234}
1235
1236/**
1237 * xmlCurrentChar:
1238 * @ctxt: the XML parser context
1239 * @len: pointer to the length of the char read
1240 *
1241 * The current char value, if using UTF-8 this may actaully span multiple
1242 * bytes in the input buffer. Implement the end of line normalization:
1243 * 2.11 End-of-Line Handling
1244 * Wherever an external parsed entity or the literal entity value
1245 * of an internal parsed entity contains either the literal two-character
1246 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1247 * must pass to the application the single character #xA.
1248 * This behavior can conveniently be produced by normalizing all
1249 * line breaks to #xA on input, before parsing.)
1250 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001251 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001252 */
1253
1254int
1255xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1256 if (ctxt->instate == XML_PARSER_EOF)
1257 return(0);
1258
1259 if (ctxt->token != 0) {
1260 *len = 0;
1261 return(ctxt->token);
1262 }
1263 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1264 *len = 1;
1265 return((int) *ctxt->input->cur);
1266 }
1267 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1268 /*
1269 * We are supposed to handle UTF8, check it's valid
1270 * From rfc2044: encoding of the Unicode values on UTF-8:
1271 *
1272 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1273 * 0000 0000-0000 007F 0xxxxxxx
1274 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1275 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1276 *
1277 * Check for the 0x110000 limit too
1278 */
1279 const unsigned char *cur = ctxt->input->cur;
1280 unsigned char c;
1281 unsigned int val;
1282
1283 c = *cur;
1284 if (c & 0x80) {
1285 if (cur[1] == 0)
1286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1287 if ((cur[1] & 0xc0) != 0x80)
1288 goto encoding_error;
1289 if ((c & 0xe0) == 0xe0) {
1290
1291 if (cur[2] == 0)
1292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1293 if ((cur[2] & 0xc0) != 0x80)
1294 goto encoding_error;
1295 if ((c & 0xf0) == 0xf0) {
1296 if (cur[3] == 0)
1297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1298 if (((c & 0xf8) != 0xf0) ||
1299 ((cur[3] & 0xc0) != 0x80))
1300 goto encoding_error;
1301 /* 4-byte code */
1302 *len = 4;
1303 val = (cur[0] & 0x7) << 18;
1304 val |= (cur[1] & 0x3f) << 12;
1305 val |= (cur[2] & 0x3f) << 6;
1306 val |= cur[3] & 0x3f;
1307 } else {
1308 /* 3-byte code */
1309 *len = 3;
1310 val = (cur[0] & 0xf) << 12;
1311 val |= (cur[1] & 0x3f) << 6;
1312 val |= cur[2] & 0x3f;
1313 }
1314 } else {
1315 /* 2-byte code */
1316 *len = 2;
1317 val = (cur[0] & 0x1f) << 6;
1318 val |= cur[1] & 0x3f;
1319 }
1320 if (!IS_CHAR(val)) {
1321 if ((ctxt->sax != NULL) &&
1322 (ctxt->sax->error != NULL))
1323 ctxt->sax->error(ctxt->userData,
1324 "Char 0x%X out of allowed range\n", val);
1325 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1326 ctxt->wellFormed = 0;
1327 ctxt->disableSAX = 1;
1328 }
1329 return(val);
1330 } else {
1331 /* 1-byte code */
1332 *len = 1;
1333 if (*ctxt->input->cur == 0xD) {
1334 if (ctxt->input->cur[1] == 0xA) {
1335 ctxt->nbChars++;
1336 ctxt->input->cur++;
1337 }
1338 return(0xA);
1339 }
1340 return((int) *ctxt->input->cur);
1341 }
1342 }
1343 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001344 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001345 * a compatibke encoding for the ASCII set, since
1346 * XML constructs only use < 128 chars
1347 */
1348 *len = 1;
1349 if (*ctxt->input->cur == 0xD) {
1350 if (ctxt->input->cur[1] == 0xA) {
1351 ctxt->nbChars++;
1352 ctxt->input->cur++;
1353 }
1354 return(0xA);
1355 }
1356 return((int) *ctxt->input->cur);
1357encoding_error:
1358 /*
1359 * If we detect an UTF8 error that probably mean that the
1360 * input encoding didn't get properly advertized in the
1361 * declaration header. Report the error and switch the encoding
1362 * to ISO-Latin-1 (if you don't like this policy, just declare the
1363 * encoding !)
1364 */
1365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1366 ctxt->sax->error(ctxt->userData,
1367 "Input is not proper UTF-8, indicate encoding !\n");
1368 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1369 ctxt->input->cur[0], ctxt->input->cur[1],
1370 ctxt->input->cur[2], ctxt->input->cur[3]);
1371 }
1372 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1373
1374 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1375 *len = 1;
1376 return((int) *ctxt->input->cur);
1377}
1378
1379/**
1380 * xmlStringCurrentChar:
1381 * @ctxt: the XML parser context
1382 * @cur: pointer to the beginning of the char
1383 * @len: pointer to the length of the char read
1384 *
1385 * The current char value, if using UTF-8 this may actaully span multiple
1386 * bytes in the input buffer.
1387 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001388 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001389 */
1390
1391int
1392xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001393 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001394 /*
1395 * We are supposed to handle UTF8, check it's valid
1396 * From rfc2044: encoding of the Unicode values on UTF-8:
1397 *
1398 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1399 * 0000 0000-0000 007F 0xxxxxxx
1400 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1401 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1402 *
1403 * Check for the 0x110000 limit too
1404 */
1405 unsigned char c;
1406 unsigned int val;
1407
1408 c = *cur;
1409 if (c & 0x80) {
1410 if ((cur[1] & 0xc0) != 0x80)
1411 goto encoding_error;
1412 if ((c & 0xe0) == 0xe0) {
1413
1414 if ((cur[2] & 0xc0) != 0x80)
1415 goto encoding_error;
1416 if ((c & 0xf0) == 0xf0) {
1417 if (((c & 0xf8) != 0xf0) ||
1418 ((cur[3] & 0xc0) != 0x80))
1419 goto encoding_error;
1420 /* 4-byte code */
1421 *len = 4;
1422 val = (cur[0] & 0x7) << 18;
1423 val |= (cur[1] & 0x3f) << 12;
1424 val |= (cur[2] & 0x3f) << 6;
1425 val |= cur[3] & 0x3f;
1426 } else {
1427 /* 3-byte code */
1428 *len = 3;
1429 val = (cur[0] & 0xf) << 12;
1430 val |= (cur[1] & 0x3f) << 6;
1431 val |= cur[2] & 0x3f;
1432 }
1433 } else {
1434 /* 2-byte code */
1435 *len = 2;
1436 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001437 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001438 }
1439 if (!IS_CHAR(val)) {
1440 if ((ctxt->sax != NULL) &&
1441 (ctxt->sax->error != NULL))
1442 ctxt->sax->error(ctxt->userData,
1443 "Char 0x%X out of allowed range\n", val);
1444 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1445 ctxt->wellFormed = 0;
1446 ctxt->disableSAX = 1;
1447 }
1448 return(val);
1449 } else {
1450 /* 1-byte code */
1451 *len = 1;
1452 return((int) *cur);
1453 }
1454 }
1455 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001456 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001457 * a compatibke encoding for the ASCII set, since
1458 * XML constructs only use < 128 chars
1459 */
1460 *len = 1;
1461 return((int) *cur);
1462encoding_error:
1463 /*
1464 * If we detect an UTF8 error that probably mean that the
1465 * input encoding didn't get properly advertized in the
1466 * declaration header. Report the error and switch the encoding
1467 * to ISO-Latin-1 (if you don't like this policy, just declare the
1468 * encoding !)
1469 */
1470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1471 ctxt->sax->error(ctxt->userData,
1472 "Input is not proper UTF-8, indicate encoding !\n");
1473 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1474 ctxt->input->cur[0], ctxt->input->cur[1],
1475 ctxt->input->cur[2], ctxt->input->cur[3]);
1476 }
1477 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1478
1479 *len = 1;
1480 return((int) *cur);
1481}
1482
1483/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001484 * xmlCopyCharMultiByte:
1485 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001486 * @val: the char value
1487 *
1488 * append the char value in the array
1489 *
1490 * Returns the number of xmlChar written
1491 */
Owen Taylor3473f882001-02-23 17:55:21 +00001492int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001493xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001494 /*
1495 * We are supposed to handle UTF8, check it's valid
1496 * From rfc2044: encoding of the Unicode values on UTF-8:
1497 *
1498 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1499 * 0000 0000-0000 007F 0xxxxxxx
1500 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1501 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1502 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001503 if (val >= 0x80) {
1504 xmlChar *savedout = out;
1505 int bits;
1506 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1507 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1508 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1509 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001510 xmlGenericError(xmlGenericErrorContext,
1511 "Internal error, xmlCopyChar 0x%X out of bound\n",
1512 val);
1513 return(0);
1514 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001515 for ( ; bits >= 0; bits-= 6)
1516 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1517 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 }
1519 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001520 return 1;
1521}
1522
1523/**
1524 * xmlCopyChar:
1525 * @len: Ignored, compatibility
1526 * @out: pointer to an arry of xmlChar
1527 * @val: the char value
1528 *
1529 * append the char value in the array
1530 *
1531 * Returns the number of xmlChar written
1532 */
1533
1534int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001535xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001536 /* the len parameter is ignored */
1537 if (val >= 0x80) {
1538 return(xmlCopyCharMultiByte (out, val));
1539 }
1540 *out = (xmlChar) val;
1541 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001542}
1543
1544/************************************************************************
1545 * *
1546 * Commodity functions to switch encodings *
1547 * *
1548 ************************************************************************/
1549
1550/**
1551 * xmlSwitchEncoding:
1552 * @ctxt: the parser context
1553 * @enc: the encoding value (number)
1554 *
1555 * change the input functions when discovering the character encoding
1556 * of a given entity.
1557 *
1558 * Returns 0 in case of success, -1 otherwise
1559 */
1560int
1561xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1562{
1563 xmlCharEncodingHandlerPtr handler;
1564
1565 switch (enc) {
1566 case XML_CHAR_ENCODING_ERROR:
1567 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1569 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1570 ctxt->wellFormed = 0;
1571 ctxt->disableSAX = 1;
1572 break;
1573 case XML_CHAR_ENCODING_NONE:
1574 /* let's assume it's UTF-8 without the XML decl */
1575 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1576 return(0);
1577 case XML_CHAR_ENCODING_UTF8:
1578 /* default encoding, no conversion should be needed */
1579 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001580
1581 /*
1582 * Errata on XML-1.0 June 20 2001
1583 * Specific handling of the Byte Order Mark for
1584 * UTF-8
1585 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001586 if ((ctxt->input != NULL) &&
1587 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001588 (ctxt->input->cur[1] == 0xBB) &&
1589 (ctxt->input->cur[2] == 0xBF)) {
1590 ctxt->input->cur += 3;
1591 }
Owen Taylor3473f882001-02-23 17:55:21 +00001592 return(0);
1593 default:
1594 break;
1595 }
1596 handler = xmlGetCharEncodingHandler(enc);
1597 if (handler == NULL) {
1598 /*
1599 * Default handlers.
1600 */
1601 switch (enc) {
1602 case XML_CHAR_ENCODING_ERROR:
1603 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1606 ctxt->wellFormed = 0;
1607 ctxt->disableSAX = 1;
1608 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1609 break;
1610 case XML_CHAR_ENCODING_NONE:
1611 /* let's assume it's UTF-8 without the XML decl */
1612 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1613 return(0);
1614 case XML_CHAR_ENCODING_UTF8:
1615 case XML_CHAR_ENCODING_ASCII:
1616 /* default encoding, no conversion should be needed */
1617 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1618 return(0);
1619 case XML_CHAR_ENCODING_UTF16LE:
1620 break;
1621 case XML_CHAR_ENCODING_UTF16BE:
1622 break;
1623 case XML_CHAR_ENCODING_UCS4LE:
1624 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626 ctxt->sax->error(ctxt->userData,
1627 "char encoding USC4 little endian not supported\n");
1628 break;
1629 case XML_CHAR_ENCODING_UCS4BE:
1630 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1632 ctxt->sax->error(ctxt->userData,
1633 "char encoding USC4 big endian not supported\n");
1634 break;
1635 case XML_CHAR_ENCODING_EBCDIC:
1636 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638 ctxt->sax->error(ctxt->userData,
1639 "char encoding EBCDIC not supported\n");
1640 break;
1641 case XML_CHAR_ENCODING_UCS4_2143:
1642 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "char encoding UCS4 2143 not supported\n");
1646 break;
1647 case XML_CHAR_ENCODING_UCS4_3412:
1648 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650 ctxt->sax->error(ctxt->userData,
1651 "char encoding UCS4 3412 not supported\n");
1652 break;
1653 case XML_CHAR_ENCODING_UCS2:
1654 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "char encoding UCS2 not supported\n");
1658 break;
1659 case XML_CHAR_ENCODING_8859_1:
1660 case XML_CHAR_ENCODING_8859_2:
1661 case XML_CHAR_ENCODING_8859_3:
1662 case XML_CHAR_ENCODING_8859_4:
1663 case XML_CHAR_ENCODING_8859_5:
1664 case XML_CHAR_ENCODING_8859_6:
1665 case XML_CHAR_ENCODING_8859_7:
1666 case XML_CHAR_ENCODING_8859_8:
1667 case XML_CHAR_ENCODING_8859_9:
1668 /*
1669 * We used to keep the internal content in the
1670 * document encoding however this turns being unmaintainable
1671 * So xmlGetCharEncodingHandler() will return non-null
1672 * values for this now.
1673 */
1674 if ((ctxt->inputNr == 1) &&
1675 (ctxt->encoding == NULL) &&
1676 (ctxt->input->encoding != NULL)) {
1677 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1678 }
1679 ctxt->charset = enc;
1680 return(0);
1681 case XML_CHAR_ENCODING_2022_JP:
1682 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1684 ctxt->sax->error(ctxt->userData,
1685 "char encoding ISO-2022-JPnot supported\n");
1686 break;
1687 case XML_CHAR_ENCODING_SHIFT_JIS:
1688 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
1691 "char encoding Shift_JIS not supported\n");
1692 break;
1693 case XML_CHAR_ENCODING_EUC_JP:
1694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696 ctxt->sax->error(ctxt->userData,
1697 "char encoding EUC-JPnot supported\n");
1698 break;
1699 }
1700 }
1701 if (handler == NULL)
1702 return(-1);
1703 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1704 return(xmlSwitchToEncoding(ctxt, handler));
1705}
1706
1707/**
1708 * xmlSwitchToEncoding:
1709 * @ctxt: the parser context
1710 * @handler: the encoding handler
1711 *
1712 * change the input functions when discovering the character encoding
1713 * of a given entity.
1714 *
1715 * Returns 0 in case of success, -1 otherwise
1716 */
1717int
1718xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1719{
1720 int nbchars;
1721
1722 if (handler != NULL) {
1723 if (ctxt->input != NULL) {
1724 if (ctxt->input->buf != NULL) {
1725 if (ctxt->input->buf->encoder != NULL) {
1726 if (ctxt->input->buf->encoder == handler)
1727 return(0);
1728 /*
1729 * Note: this is a bit dangerous, but that's what it
1730 * takes to use nearly compatible signature for different
1731 * encodings.
1732 */
1733 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1734 ctxt->input->buf->encoder = handler;
1735 return(0);
1736 }
1737 ctxt->input->buf->encoder = handler;
1738
1739 /*
1740 * Is there already some content down the pipe to convert ?
1741 */
1742 if ((ctxt->input->buf->buffer != NULL) &&
1743 (ctxt->input->buf->buffer->use > 0)) {
1744 int processed;
1745
1746 /*
1747 * Specific handling of the Byte Order Mark for
1748 * UTF-16
1749 */
1750 if ((handler->name != NULL) &&
1751 (!strcmp(handler->name, "UTF-16LE")) &&
1752 (ctxt->input->cur[0] == 0xFF) &&
1753 (ctxt->input->cur[1] == 0xFE)) {
1754 ctxt->input->cur += 2;
1755 }
1756 if ((handler->name != NULL) &&
1757 (!strcmp(handler->name, "UTF-16BE")) &&
1758 (ctxt->input->cur[0] == 0xFE) &&
1759 (ctxt->input->cur[1] == 0xFF)) {
1760 ctxt->input->cur += 2;
1761 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001762 /*
1763 * Errata on XML-1.0 June 20 2001
1764 * Specific handling of the Byte Order Mark for
1765 * UTF-8
1766 */
1767 if ((handler->name != NULL) &&
1768 (!strcmp(handler->name, "UTF-8")) &&
1769 (ctxt->input->cur[0] == 0xEF) &&
1770 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001771 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001772 ctxt->input->cur += 3;
1773 }
Owen Taylor3473f882001-02-23 17:55:21 +00001774
1775 /*
1776 * Shring the current input buffer.
1777 * Move it as the raw buffer and create a new input buffer
1778 */
1779 processed = ctxt->input->cur - ctxt->input->base;
1780 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1781 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1782 ctxt->input->buf->buffer = xmlBufferCreate();
1783
1784 if (ctxt->html) {
1785 /*
1786 * converst as much as possbile of the buffer
1787 */
1788 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1789 ctxt->input->buf->buffer,
1790 ctxt->input->buf->raw);
1791 } else {
1792 /*
1793 * convert just enough to get
1794 * '<?xml version="1.0" encoding="xxx"?>'
1795 * parsed with the autodetected encoding
1796 * into the parser reading buffer.
1797 */
1798 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1799 ctxt->input->buf->buffer,
1800 ctxt->input->buf->raw);
1801 }
1802 if (nbchars < 0) {
1803 xmlGenericError(xmlGenericErrorContext,
1804 "xmlSwitchToEncoding: encoder error\n");
1805 return(-1);
1806 }
1807 ctxt->input->base =
1808 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001809 ctxt->input->end =
1810 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001811
1812 }
1813 return(0);
1814 } else {
1815 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1816 /*
1817 * When parsing a static memory array one must know the
1818 * size to be able to convert the buffer.
1819 */
1820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1821 ctxt->sax->error(ctxt->userData,
1822 "xmlSwitchEncoding : no input\n");
1823 return(-1);
1824 } else {
1825 int processed;
1826
1827 /*
1828 * Shring the current input buffer.
1829 * Move it as the raw buffer and create a new input buffer
1830 */
1831 processed = ctxt->input->cur - ctxt->input->base;
1832
1833 ctxt->input->buf->raw = xmlBufferCreate();
1834 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1835 ctxt->input->length - processed);
1836 ctxt->input->buf->buffer = xmlBufferCreate();
1837
1838 /*
1839 * convert as much as possible of the raw input
1840 * to the parser reading buffer.
1841 */
1842 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1843 ctxt->input->buf->buffer,
1844 ctxt->input->buf->raw);
1845 if (nbchars < 0) {
1846 xmlGenericError(xmlGenericErrorContext,
1847 "xmlSwitchToEncoding: encoder error\n");
1848 return(-1);
1849 }
1850
1851 /*
1852 * Conversion succeeded, get rid of the old buffer
1853 */
1854 if ((ctxt->input->free != NULL) &&
1855 (ctxt->input->base != NULL))
1856 ctxt->input->free((xmlChar *) ctxt->input->base);
1857 ctxt->input->base =
1858 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001859 ctxt->input->end =
1860 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001861 }
1862 }
1863 } else {
1864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1865 ctxt->sax->error(ctxt->userData,
1866 "xmlSwitchEncoding : no input\n");
1867 return(-1);
1868 }
1869 /*
1870 * The parsing is now done in UTF8 natively
1871 */
1872 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1873 } else
1874 return(-1);
1875 return(0);
1876
1877}
1878
1879/************************************************************************
1880 * *
1881 * Commodity functions to handle entities processing *
1882 * *
1883 ************************************************************************/
1884
1885/**
1886 * xmlFreeInputStream:
1887 * @input: an xmlParserInputPtr
1888 *
1889 * Free up an input stream.
1890 */
1891void
1892xmlFreeInputStream(xmlParserInputPtr input) {
1893 if (input == NULL) return;
1894
1895 if (input->filename != NULL) xmlFree((char *) input->filename);
1896 if (input->directory != NULL) xmlFree((char *) input->directory);
1897 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1898 if (input->version != NULL) xmlFree((char *) input->version);
1899 if ((input->free != NULL) && (input->base != NULL))
1900 input->free((xmlChar *) input->base);
1901 if (input->buf != NULL)
1902 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 xmlFree(input);
1904}
1905
1906/**
1907 * xmlNewInputStream:
1908 * @ctxt: an XML parser context
1909 *
1910 * Create a new input stream structure
1911 * Returns the new input stream or NULL
1912 */
1913xmlParserInputPtr
1914xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1915 xmlParserInputPtr input;
1916
1917 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1918 if (input == NULL) {
1919 if (ctxt != NULL) {
1920 ctxt->errNo = XML_ERR_NO_MEMORY;
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData,
1923 "malloc: couldn't allocate a new input stream\n");
1924 ctxt->errNo = XML_ERR_NO_MEMORY;
1925 }
1926 return(NULL);
1927 }
1928 memset(input, 0, sizeof(xmlParserInput));
1929 input->line = 1;
1930 input->col = 1;
1931 input->standalone = -1;
1932 return(input);
1933}
1934
1935/**
1936 * xmlNewIOInputStream:
1937 * @ctxt: an XML parser context
1938 * @input: an I/O Input
1939 * @enc: the charset encoding if known
1940 *
1941 * Create a new input stream structure encapsulating the @input into
1942 * a stream suitable for the parser.
1943 *
1944 * Returns the new input stream or NULL
1945 */
1946xmlParserInputPtr
1947xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1948 xmlCharEncoding enc) {
1949 xmlParserInputPtr inputStream;
1950
1951 if (xmlParserDebugEntities)
1952 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1953 inputStream = xmlNewInputStream(ctxt);
1954 if (inputStream == NULL) {
1955 return(NULL);
1956 }
1957 inputStream->filename = NULL;
1958 inputStream->buf = input;
1959 inputStream->base = inputStream->buf->buffer->content;
1960 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001961 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (enc != XML_CHAR_ENCODING_NONE) {
1963 xmlSwitchEncoding(ctxt, enc);
1964 }
1965
1966 return(inputStream);
1967}
1968
1969/**
1970 * xmlNewEntityInputStream:
1971 * @ctxt: an XML parser context
1972 * @entity: an Entity pointer
1973 *
1974 * Create a new input stream based on an xmlEntityPtr
1975 *
1976 * Returns the new input stream or NULL
1977 */
1978xmlParserInputPtr
1979xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1980 xmlParserInputPtr input;
1981
1982 if (entity == NULL) {
1983 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "internal: xmlNewEntityInputStream entity = NULL\n");
1987 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1988 return(NULL);
1989 }
1990 if (xmlParserDebugEntities)
1991 xmlGenericError(xmlGenericErrorContext,
1992 "new input from entity: %s\n", entity->name);
1993 if (entity->content == NULL) {
1994 switch (entity->etype) {
1995 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1996 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1998 ctxt->sax->error(ctxt->userData,
1999 "xmlNewEntityInputStream unparsed entity !\n");
2000 break;
2001 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2002 case XML_EXTERNAL_PARAMETER_ENTITY:
2003 return(xmlLoadExternalEntity((char *) entity->URI,
2004 (char *) entity->ExternalID, ctxt));
2005 case XML_INTERNAL_GENERAL_ENTITY:
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData,
2008 "Internal entity %s without content !\n", entity->name);
2009 break;
2010 case XML_INTERNAL_PARAMETER_ENTITY:
2011 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
2014 "Internal parameter entity %s without content !\n", entity->name);
2015 break;
2016 case XML_INTERNAL_PREDEFINED_ENTITY:
2017 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2019 ctxt->sax->error(ctxt->userData,
2020 "Predefined entity %s without content !\n", entity->name);
2021 break;
2022 }
2023 return(NULL);
2024 }
2025 input = xmlNewInputStream(ctxt);
2026 if (input == NULL) {
2027 return(NULL);
2028 }
2029 input->filename = (char *) entity->URI;
2030 input->base = entity->content;
2031 input->cur = entity->content;
2032 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002033 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(input);
2035}
2036
2037/**
2038 * xmlNewStringInputStream:
2039 * @ctxt: an XML parser context
2040 * @buffer: an memory buffer
2041 *
2042 * Create a new input stream based on a memory buffer.
2043 * Returns the new input stream
2044 */
2045xmlParserInputPtr
2046xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2047 xmlParserInputPtr input;
2048
2049 if (buffer == NULL) {
2050 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2052 ctxt->sax->error(ctxt->userData,
2053 "internal: xmlNewStringInputStream string = NULL\n");
2054 return(NULL);
2055 }
2056 if (xmlParserDebugEntities)
2057 xmlGenericError(xmlGenericErrorContext,
2058 "new fixed input: %.30s\n", buffer);
2059 input = xmlNewInputStream(ctxt);
2060 if (input == NULL) {
2061 return(NULL);
2062 }
2063 input->base = buffer;
2064 input->cur = buffer;
2065 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002066 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002067 return(input);
2068}
2069
2070/**
2071 * xmlNewInputFromFile:
2072 * @ctxt: an XML parser context
2073 * @filename: the filename to use as entity
2074 *
2075 * Create a new input stream based on a file.
2076 *
2077 * Returns the new input stream or NULL in case of error
2078 */
2079xmlParserInputPtr
2080xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2081 xmlParserInputBufferPtr buf;
2082 xmlParserInputPtr inputStream;
2083 char *directory = NULL;
2084 xmlChar *URI = NULL;
2085
2086 if (xmlParserDebugEntities)
2087 xmlGenericError(xmlGenericErrorContext,
2088 "new input from file: %s\n", filename);
2089 if (ctxt == NULL) return(NULL);
2090 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2091 if (buf == NULL)
2092 return(NULL);
2093
2094 URI = xmlStrdup((xmlChar *) filename);
2095 directory = xmlParserGetDirectory((const char *) URI);
2096
2097 inputStream = xmlNewInputStream(ctxt);
2098 if (inputStream == NULL) {
2099 if (directory != NULL) xmlFree((char *) directory);
2100 if (URI != NULL) xmlFree((char *) URI);
2101 return(NULL);
2102 }
2103
2104 inputStream->filename = (const char *) URI;
2105 inputStream->directory = directory;
2106 inputStream->buf = buf;
2107
2108 inputStream->base = inputStream->buf->buffer->content;
2109 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002110 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002111 if ((ctxt->directory == NULL) && (directory != NULL))
2112 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2113 return(inputStream);
2114}
2115
2116/************************************************************************
2117 * *
2118 * Commodity functions to handle parser contexts *
2119 * *
2120 ************************************************************************/
2121
2122/**
2123 * xmlInitParserCtxt:
2124 * @ctxt: an XML parser context
2125 *
2126 * Initialize a parser context
2127 */
2128
2129void
2130xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2131{
2132 xmlSAXHandler *sax;
2133
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002134 if(ctxt==NULL) {
2135 xmlGenericError(xmlGenericErrorContext,
2136 "xmlInitParserCtxt: NULL context given\n");
2137 return;
2138 }
2139
Owen Taylor3473f882001-02-23 17:55:21 +00002140 xmlDefaultSAXHandlerInit();
2141
2142 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2143 if (sax == NULL) {
2144 xmlGenericError(xmlGenericErrorContext,
2145 "xmlInitParserCtxt: out of memory\n");
2146 }
2147 else
2148 memset(sax, 0, sizeof(xmlSAXHandler));
2149
2150 /* Allocate the Input stack */
2151 ctxt->inputTab = (xmlParserInputPtr *)
2152 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2153 if (ctxt->inputTab == NULL) {
2154 xmlGenericError(xmlGenericErrorContext,
2155 "xmlInitParserCtxt: out of memory\n");
2156 ctxt->inputNr = 0;
2157 ctxt->inputMax = 0;
2158 ctxt->input = NULL;
2159 return;
2160 }
2161 ctxt->inputNr = 0;
2162 ctxt->inputMax = 5;
2163 ctxt->input = NULL;
2164
2165 ctxt->version = NULL;
2166 ctxt->encoding = NULL;
2167 ctxt->standalone = -1;
2168 ctxt->hasExternalSubset = 0;
2169 ctxt->hasPErefs = 0;
2170 ctxt->html = 0;
2171 ctxt->external = 0;
2172 ctxt->instate = XML_PARSER_START;
2173 ctxt->token = 0;
2174 ctxt->directory = NULL;
2175
2176 /* Allocate the Node stack */
2177 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2178 if (ctxt->nodeTab == NULL) {
2179 xmlGenericError(xmlGenericErrorContext,
2180 "xmlInitParserCtxt: out of memory\n");
2181 ctxt->nodeNr = 0;
2182 ctxt->nodeMax = 0;
2183 ctxt->node = NULL;
2184 ctxt->inputNr = 0;
2185 ctxt->inputMax = 0;
2186 ctxt->input = NULL;
2187 return;
2188 }
2189 ctxt->nodeNr = 0;
2190 ctxt->nodeMax = 10;
2191 ctxt->node = NULL;
2192
2193 /* Allocate the Name stack */
2194 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2195 if (ctxt->nameTab == NULL) {
2196 xmlGenericError(xmlGenericErrorContext,
2197 "xmlInitParserCtxt: out of memory\n");
2198 ctxt->nodeNr = 0;
2199 ctxt->nodeMax = 0;
2200 ctxt->node = NULL;
2201 ctxt->inputNr = 0;
2202 ctxt->inputMax = 0;
2203 ctxt->input = NULL;
2204 ctxt->nameNr = 0;
2205 ctxt->nameMax = 0;
2206 ctxt->name = NULL;
2207 return;
2208 }
2209 ctxt->nameNr = 0;
2210 ctxt->nameMax = 10;
2211 ctxt->name = NULL;
2212
2213 /* Allocate the space stack */
2214 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2215 if (ctxt->spaceTab == NULL) {
2216 xmlGenericError(xmlGenericErrorContext,
2217 "xmlInitParserCtxt: out of memory\n");
2218 ctxt->nodeNr = 0;
2219 ctxt->nodeMax = 0;
2220 ctxt->node = NULL;
2221 ctxt->inputNr = 0;
2222 ctxt->inputMax = 0;
2223 ctxt->input = NULL;
2224 ctxt->nameNr = 0;
2225 ctxt->nameMax = 0;
2226 ctxt->name = NULL;
2227 ctxt->spaceNr = 0;
2228 ctxt->spaceMax = 0;
2229 ctxt->space = NULL;
2230 return;
2231 }
2232 ctxt->spaceNr = 1;
2233 ctxt->spaceMax = 10;
2234 ctxt->spaceTab[0] = -1;
2235 ctxt->space = &ctxt->spaceTab[0];
2236
Daniel Veillard14be0a12001-03-03 18:50:55 +00002237 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002238 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002239
Owen Taylor3473f882001-02-23 17:55:21 +00002240 ctxt->userData = ctxt;
2241 ctxt->myDoc = NULL;
2242 ctxt->wellFormed = 1;
2243 ctxt->valid = 1;
2244 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2245 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2246 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002247 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002248 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002249 if (ctxt->keepBlanks == 0)
2250 sax->ignorableWhitespace = ignorableWhitespace;
2251
Owen Taylor3473f882001-02-23 17:55:21 +00002252 ctxt->vctxt.userData = ctxt;
2253 if (ctxt->validate) {
2254 ctxt->vctxt.error = xmlParserValidityError;
2255 if (xmlGetWarningsDefaultValue == 0)
2256 ctxt->vctxt.warning = NULL;
2257 else
2258 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002259 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002260 } else {
2261 ctxt->vctxt.error = NULL;
2262 ctxt->vctxt.warning = NULL;
2263 }
2264 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2265 ctxt->record_info = 0;
2266 ctxt->nbChars = 0;
2267 ctxt->checkIndex = 0;
2268 ctxt->inSubset = 0;
2269 ctxt->errNo = XML_ERR_OK;
2270 ctxt->depth = 0;
2271 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002272 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002273 xmlInitNodeInfoSeq(&ctxt->node_seq);
2274}
2275
2276/**
2277 * xmlFreeParserCtxt:
2278 * @ctxt: an XML parser context
2279 *
2280 * Free all the memory used by a parser context. However the parsed
2281 * document in ctxt->myDoc is not freed.
2282 */
2283
2284void
2285xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2286{
2287 xmlParserInputPtr input;
2288 xmlChar *oldname;
2289
2290 if (ctxt == NULL) return;
2291
2292 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2293 xmlFreeInputStream(input);
2294 }
2295 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2296 xmlFree(oldname);
2297 }
2298 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2299 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2300 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2301 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2302 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2303 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2304 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2305 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2306 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2308 xmlFree(ctxt->sax);
2309 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002310 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002311#ifdef LIBXML_CATALOG_ENABLED
2312 if (ctxt->catalogs != NULL)
2313 xmlCatalogFreeLocal(ctxt->catalogs);
2314#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002315 xmlFree(ctxt);
2316}
2317
2318/**
2319 * xmlNewParserCtxt:
2320 *
2321 * Allocate and initialize a new parser context.
2322 *
2323 * Returns the xmlParserCtxtPtr or NULL
2324 */
2325
2326xmlParserCtxtPtr
2327xmlNewParserCtxt()
2328{
2329 xmlParserCtxtPtr ctxt;
2330
2331 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2332 if (ctxt == NULL) {
2333 xmlGenericError(xmlGenericErrorContext,
2334 "xmlNewParserCtxt : cannot allocate context\n");
2335 perror("malloc");
2336 return(NULL);
2337 }
2338 memset(ctxt, 0, sizeof(xmlParserCtxt));
2339 xmlInitParserCtxt(ctxt);
2340 return(ctxt);
2341}
2342
2343/************************************************************************
2344 * *
2345 * Handling of node informations *
2346 * *
2347 ************************************************************************/
2348
2349/**
2350 * xmlClearParserCtxt:
2351 * @ctxt: an XML parser context
2352 *
2353 * Clear (release owned resources) and reinitialize a parser context
2354 */
2355
2356void
2357xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2358{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002359 if (ctxt==NULL)
2360 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002361 xmlClearNodeInfoSeq(&ctxt->node_seq);
2362 xmlInitParserCtxt(ctxt);
2363}
2364
2365/**
2366 * xmlParserFindNodeInfo:
2367 * @ctxt: an XML parser context
2368 * @node: an XML node within the tree
2369 *
2370 * Find the parser node info struct for a given node
2371 *
2372 * Returns an xmlParserNodeInfo block pointer or NULL
2373 */
2374const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2375 const xmlNode* node)
2376{
2377 unsigned long pos;
2378
2379 /* Find position where node should be at */
2380 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002381 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002382 return &ctx->node_seq.buffer[pos];
2383 else
2384 return NULL;
2385}
2386
2387
2388/**
2389 * xmlInitNodeInfoSeq:
2390 * @seq: a node info sequence pointer
2391 *
2392 * -- Initialize (set to initial state) node info sequence
2393 */
2394void
2395xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2396{
2397 seq->length = 0;
2398 seq->maximum = 0;
2399 seq->buffer = NULL;
2400}
2401
2402/**
2403 * xmlClearNodeInfoSeq:
2404 * @seq: a node info sequence pointer
2405 *
2406 * -- Clear (release memory and reinitialize) node
2407 * info sequence
2408 */
2409void
2410xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2411{
2412 if ( seq->buffer != NULL )
2413 xmlFree(seq->buffer);
2414 xmlInitNodeInfoSeq(seq);
2415}
2416
2417
2418/**
2419 * xmlParserFindNodeInfoIndex:
2420 * @seq: a node info sequence pointer
2421 * @node: an XML node pointer
2422 *
2423 *
2424 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2425 * the given node is or should be at in a sorted sequence
2426 *
2427 * Returns a long indicating the position of the record
2428 */
2429unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2430 const xmlNode* node)
2431{
2432 unsigned long upper, lower, middle;
2433 int found = 0;
2434
2435 /* Do a binary search for the key */
2436 lower = 1;
2437 upper = seq->length;
2438 middle = 0;
2439 while ( lower <= upper && !found) {
2440 middle = lower + (upper - lower) / 2;
2441 if ( node == seq->buffer[middle - 1].node )
2442 found = 1;
2443 else if ( node < seq->buffer[middle - 1].node )
2444 upper = middle - 1;
2445 else
2446 lower = middle + 1;
2447 }
2448
2449 /* Return position */
2450 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2451 return middle;
2452 else
2453 return middle - 1;
2454}
2455
2456
2457/**
2458 * xmlParserAddNodeInfo:
2459 * @ctxt: an XML parser context
2460 * @info: a node info sequence pointer
2461 *
2462 * Insert node info record into the sorted sequence
2463 */
2464void
2465xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2466 const xmlParserNodeInfo* info)
2467{
2468 unsigned long pos;
2469 static unsigned int block_size = 5;
2470
2471 /* Find pos and check to see if node is already in the sequence */
2472 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2473 if ( pos < ctxt->node_seq.length
2474 && ctxt->node_seq.buffer[pos].node == info->node ) {
2475 ctxt->node_seq.buffer[pos] = *info;
2476 }
2477
2478 /* Otherwise, we need to add new node to buffer */
2479 else {
2480 /* Expand buffer by 5 if needed */
2481 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2482 xmlParserNodeInfo* tmp_buffer;
2483 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2484 *(ctxt->node_seq.maximum + block_size));
2485
2486 if ( ctxt->node_seq.buffer == NULL )
2487 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2488 else
2489 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2490
2491 if ( tmp_buffer == NULL ) {
2492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2494 ctxt->errNo = XML_ERR_NO_MEMORY;
2495 return;
2496 }
2497 ctxt->node_seq.buffer = tmp_buffer;
2498 ctxt->node_seq.maximum += block_size;
2499 }
2500
2501 /* If position is not at end, move elements out of the way */
2502 if ( pos != ctxt->node_seq.length ) {
2503 unsigned long i;
2504
2505 for ( i = ctxt->node_seq.length; i > pos; i-- )
2506 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2507 }
2508
2509 /* Copy element and increase length */
2510 ctxt->node_seq.buffer[pos] = *info;
2511 ctxt->node_seq.length++;
2512 }
2513}
2514
2515/************************************************************************
2516 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002517 * Defaults settings *
2518 * *
2519 ************************************************************************/
2520/**
2521 * xmlPedanticParserDefault:
2522 * @val: int 0 or 1
2523 *
2524 * Set and return the previous value for enabling pedantic warnings.
2525 *
2526 * Returns the last value for 0 for no substitution, 1 for substitution.
2527 */
2528
2529int
2530xmlPedanticParserDefault(int val) {
2531 int old = xmlPedanticParserDefaultValue;
2532
2533 xmlPedanticParserDefaultValue = val;
2534 return(old);
2535}
2536
2537/**
2538 * xmlLineNumbersDefault:
2539 * @val: int 0 or 1
2540 *
2541 * Set and return the previous value for enabling line numbers in elements
2542 * contents. This may break on old application and is turned off by default.
2543 *
2544 * Returns the last value for 0 for no substitution, 1 for substitution.
2545 */
2546
2547int
2548xmlLineNumbersDefault(int val) {
2549 int old = xmlLineNumbersDefaultValue;
2550
2551 xmlLineNumbersDefaultValue = val;
2552 return(old);
2553}
2554
2555/**
2556 * xmlSubstituteEntitiesDefault:
2557 * @val: int 0 or 1
2558 *
2559 * Set and return the previous value for default entity support.
2560 * Initially the parser always keep entity references instead of substituting
2561 * entity values in the output. This function has to be used to change the
2562 * default parser behaviour
2563 * SAX::subtituteEntities() has to be used for changing that on a file by
2564 * file basis.
2565 *
2566 * Returns the last value for 0 for no substitution, 1 for substitution.
2567 */
2568
2569int
2570xmlSubstituteEntitiesDefault(int val) {
2571 int old = xmlSubstituteEntitiesDefaultValue;
2572
2573 xmlSubstituteEntitiesDefaultValue = val;
2574 return(old);
2575}
2576
2577/**
2578 * xmlKeepBlanksDefault:
2579 * @val: int 0 or 1
2580 *
2581 * Set and return the previous value for default blanks text nodes support.
2582 * The 1.x version of the parser used an heuristic to try to detect
2583 * ignorable white spaces. As a result the SAX callback was generating
2584 * ignorableWhitespace() callbacks instead of characters() one, and when
2585 * using the DOM output text nodes containing those blanks were not generated.
2586 * The 2.x and later version will switch to the XML standard way and
2587 * ignorableWhitespace() are only generated when running the parser in
2588 * validating mode and when the current element doesn't allow CDATA or
2589 * mixed content.
2590 * This function is provided as a way to force the standard behaviour
2591 * on 1.X libs and to switch back to the old mode for compatibility when
2592 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2593 * by using xmlIsBlankNode() commodity function to detect the "empty"
2594 * nodes generated.
2595 * This value also affect autogeneration of indentation when saving code
2596 * if blanks sections are kept, indentation is not generated.
2597 *
2598 * Returns the last value for 0 for no substitution, 1 for substitution.
2599 */
2600
2601int
2602xmlKeepBlanksDefault(int val) {
2603 int old = xmlKeepBlanksDefaultValue;
2604
2605 xmlKeepBlanksDefaultValue = val;
2606 xmlIndentTreeOutput = !val;
2607 return(old);
2608}
2609
2610/************************************************************************
2611 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002612 * Deprecated functions kept for compatibility *
2613 * *
2614 ************************************************************************/
2615
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002616/**
2617 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002618 * @lang: pointer to the string value
2619 *
2620 * Checks that the value conforms to the LanguageID production:
2621 *
2622 * NOTE: this is somewhat deprecated, those productions were removed from
2623 * the XML Second edition.
2624 *
2625 * [33] LanguageID ::= Langcode ('-' Subcode)*
2626 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2627 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2628 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2629 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2630 * [38] Subcode ::= ([a-z] | [A-Z])+
2631 *
2632 * Returns 1 if correct 0 otherwise
2633 **/
2634int
2635xmlCheckLanguageID(const xmlChar *lang) {
2636 const xmlChar *cur = lang;
2637
2638 if (cur == NULL)
2639 return(0);
2640 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2641 ((cur[0] == 'I') && (cur[1] == '-'))) {
2642 /*
2643 * IANA code
2644 */
2645 cur += 2;
2646 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2647 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2648 cur++;
2649 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2650 ((cur[0] == 'X') && (cur[1] == '-'))) {
2651 /*
2652 * User code
2653 */
2654 cur += 2;
2655 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2656 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2657 cur++;
2658 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2659 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2660 /*
2661 * ISO639
2662 */
2663 cur++;
2664 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2665 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2666 cur++;
2667 else
2668 return(0);
2669 } else
2670 return(0);
2671 while (cur[0] != 0) { /* non input consuming */
2672 if (cur[0] != '-')
2673 return(0);
2674 cur++;
2675 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2676 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2677 cur++;
2678 else
2679 return(0);
2680 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2681 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2682 cur++;
2683 }
2684 return(1);
2685}
2686
2687/**
2688 * xmlDecodeEntities:
2689 * @ctxt: the parser context
2690 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2691 * @len: the len to decode (in bytes !), -1 for no size limit
2692 * @end: an end marker xmlChar, 0 if none
2693 * @end2: an end marker xmlChar, 0 if none
2694 * @end3: an end marker xmlChar, 0 if none
2695 *
2696 * This function is deprecated, we now always process entities content
2697 * through xmlStringDecodeEntities
2698 *
2699 * TODO: remove it in next major release.
2700 *
2701 * [67] Reference ::= EntityRef | CharRef
2702 *
2703 * [69] PEReference ::= '%' Name ';'
2704 *
2705 * Returns A newly allocated string with the substitution done. The caller
2706 * must deallocate it !
2707 */
2708xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002709xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2710 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002711#if 0
2712 xmlChar *buffer = NULL;
2713 unsigned int buffer_size = 0;
2714 unsigned int nbchars = 0;
2715
2716 xmlChar *current = NULL;
2717 xmlEntityPtr ent;
2718 unsigned int max = (unsigned int) len;
2719 int c,l;
2720#endif
2721
2722 static int deprecated = 0;
2723 if (!deprecated) {
2724 xmlGenericError(xmlGenericErrorContext,
2725 "xmlDecodeEntities() deprecated function reached\n");
2726 deprecated = 1;
2727 }
2728
2729#if 0
2730 if (ctxt->depth > 40) {
2731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2732 ctxt->sax->error(ctxt->userData,
2733 "Detected entity reference loop\n");
2734 ctxt->wellFormed = 0;
2735 ctxt->disableSAX = 1;
2736 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2737 return(NULL);
2738 }
2739
2740 /*
2741 * allocate a translation buffer.
2742 */
2743 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2744 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2745 if (buffer == NULL) {
2746 perror("xmlDecodeEntities: malloc failed");
2747 return(NULL);
2748 }
2749
2750 /*
2751 * Ok loop until we reach one of the ending char or a size limit.
2752 */
2753 GROW;
2754 c = CUR_CHAR(l);
2755 while ((nbchars < max) && (c != end) && /* NOTUSED */
2756 (c != end2) && (c != end3)) {
2757 GROW;
2758 if (c == 0) break;
2759 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2760 int val = xmlParseCharRef(ctxt);
2761 COPY_BUF(0,buffer,nbchars,val);
2762 NEXTL(l);
2763 } else if ((c == '&') && (ctxt->token != '&') &&
2764 (what & XML_SUBSTITUTE_REF)) {
2765 if (xmlParserDebugEntities)
2766 xmlGenericError(xmlGenericErrorContext,
2767 "decoding Entity Reference\n");
2768 ent = xmlParseEntityRef(ctxt);
2769 if ((ent != NULL) &&
2770 (ctxt->replaceEntities != 0)) {
2771 current = ent->content;
2772 while (*current != 0) { /* non input consuming loop */
2773 buffer[nbchars++] = *current++;
2774 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2775 growBuffer(buffer);
2776 }
2777 }
2778 } else if (ent != NULL) {
2779 const xmlChar *cur = ent->name;
2780
2781 buffer[nbchars++] = '&';
2782 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2783 growBuffer(buffer);
2784 }
2785 while (*cur != 0) { /* non input consuming loop */
2786 buffer[nbchars++] = *cur++;
2787 }
2788 buffer[nbchars++] = ';';
2789 }
2790 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2791 /*
2792 * a PEReference induce to switch the entity flow,
2793 * we break here to flush the current set of chars
2794 * parsed if any. We will be called back later.
2795 */
2796 if (xmlParserDebugEntities)
2797 xmlGenericError(xmlGenericErrorContext,
2798 "decoding PE Reference\n");
2799 if (nbchars != 0) break;
2800
2801 xmlParsePEReference(ctxt);
2802
2803 /*
2804 * Pop-up of finished entities.
2805 */
2806 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2807 xmlPopInput(ctxt);
2808
2809 break;
2810 } else {
2811 COPY_BUF(l,buffer,nbchars,c);
2812 NEXTL(l);
2813 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2814 growBuffer(buffer);
2815 }
2816 }
2817 c = CUR_CHAR(l);
2818 }
2819 buffer[nbchars++] = 0;
2820 return(buffer);
2821#endif
2822 return(NULL);
2823}
2824
2825/**
2826 * xmlNamespaceParseNCName:
2827 * @ctxt: an XML parser context
2828 *
2829 * parse an XML namespace name.
2830 *
2831 * TODO: this seems not in use anymore, the namespace handling is done on
2832 * top of the SAX interfaces, i.e. not on raw input.
2833 *
2834 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2835 *
2836 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2837 * CombiningChar | Extender
2838 *
2839 * Returns the namespace name or NULL
2840 */
2841
2842xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002843xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002844#if 0
2845 xmlChar buf[XML_MAX_NAMELEN + 5];
2846 int len = 0, l;
2847 int cur = CUR_CHAR(l);
2848#endif
2849
2850 static int deprecated = 0;
2851 if (!deprecated) {
2852 xmlGenericError(xmlGenericErrorContext,
2853 "xmlNamespaceParseNCName() deprecated function reached\n");
2854 deprecated = 1;
2855 }
2856
2857#if 0
2858 /* load first the value of the char !!! */
2859 GROW;
2860 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2861
2862xmlGenericError(xmlGenericErrorContext,
2863 "xmlNamespaceParseNCName: reached loop 3\n");
2864 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2865 (cur == '.') || (cur == '-') ||
2866 (cur == '_') ||
2867 (IS_COMBINING(cur)) ||
2868 (IS_EXTENDER(cur))) {
2869 COPY_BUF(l,buf,len,cur);
2870 NEXTL(l);
2871 cur = CUR_CHAR(l);
2872 if (len >= XML_MAX_NAMELEN) {
2873 xmlGenericError(xmlGenericErrorContext,
2874 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2875 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2876 (cur == '.') || (cur == '-') ||
2877 (cur == '_') ||
2878 (IS_COMBINING(cur)) ||
2879 (IS_EXTENDER(cur))) {
2880 NEXTL(l);
2881 cur = CUR_CHAR(l);
2882 }
2883 break;
2884 }
2885 }
2886 return(xmlStrndup(buf, len));
2887#endif
2888 return(NULL);
2889}
2890
2891/**
2892 * xmlNamespaceParseQName:
2893 * @ctxt: an XML parser context
2894 * @prefix: a xmlChar **
2895 *
2896 * TODO: this seems not in use anymore, the namespace handling is done on
2897 * top of the SAX interfaces, i.e. not on raw input.
2898 *
2899 * parse an XML qualified name
2900 *
2901 * [NS 5] QName ::= (Prefix ':')? LocalPart
2902 *
2903 * [NS 6] Prefix ::= NCName
2904 *
2905 * [NS 7] LocalPart ::= NCName
2906 *
2907 * Returns the local part, and prefix is updated
2908 * to get the Prefix if any.
2909 */
2910
2911xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002912xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002913
2914 static int deprecated = 0;
2915 if (!deprecated) {
2916 xmlGenericError(xmlGenericErrorContext,
2917 "xmlNamespaceParseQName() deprecated function reached\n");
2918 deprecated = 1;
2919 }
2920
2921#if 0
2922 xmlChar *ret = NULL;
2923
2924 *prefix = NULL;
2925 ret = xmlNamespaceParseNCName(ctxt);
2926 if (RAW == ':') {
2927 *prefix = ret;
2928 NEXT;
2929 ret = xmlNamespaceParseNCName(ctxt);
2930 }
2931
2932 return(ret);
2933#endif
2934 return(NULL);
2935}
2936
2937/**
2938 * xmlNamespaceParseNSDef:
2939 * @ctxt: an XML parser context
2940 *
2941 * parse a namespace prefix declaration
2942 *
2943 * TODO: this seems not in use anymore, the namespace handling is done on
2944 * top of the SAX interfaces, i.e. not on raw input.
2945 *
2946 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2947 *
2948 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2949 *
2950 * Returns the namespace name
2951 */
2952
2953xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002954xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002955 static int deprecated = 0;
2956 if (!deprecated) {
2957 xmlGenericError(xmlGenericErrorContext,
2958 "xmlNamespaceParseNSDef() deprecated function reached\n");
2959 deprecated = 1;
2960 }
2961 return(NULL);
2962#if 0
2963 xmlChar *name = NULL;
2964
2965 if ((RAW == 'x') && (NXT(1) == 'm') &&
2966 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2967 (NXT(4) == 's')) {
2968 SKIP(5);
2969 if (RAW == ':') {
2970 NEXT;
2971 name = xmlNamespaceParseNCName(ctxt);
2972 }
2973 }
2974 return(name);
2975#endif
2976}
2977
2978/**
2979 * xmlParseQuotedString:
2980 * @ctxt: an XML parser context
2981 *
2982 * Parse and return a string between quotes or doublequotes
2983 *
2984 * TODO: Deprecated, to be removed at next drop of binary compatibility
2985 *
2986 * Returns the string parser or NULL.
2987 */
2988xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002989xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002990 static int deprecated = 0;
2991 if (!deprecated) {
2992 xmlGenericError(xmlGenericErrorContext,
2993 "xmlParseQuotedString() deprecated function reached\n");
2994 deprecated = 1;
2995 }
2996 return(NULL);
2997
2998#if 0
2999 xmlChar *buf = NULL;
3000 int len = 0,l;
3001 int size = XML_PARSER_BUFFER_SIZE;
3002 int c;
3003
3004 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3005 if (buf == NULL) {
3006 xmlGenericError(xmlGenericErrorContext,
3007 "malloc of %d byte failed\n", size);
3008 return(NULL);
3009 }
3010xmlGenericError(xmlGenericErrorContext,
3011 "xmlParseQuotedString: reached loop 4\n");
3012 if (RAW == '"') {
3013 NEXT;
3014 c = CUR_CHAR(l);
3015 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3016 if (len + 5 >= size) {
3017 size *= 2;
3018 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3019 if (buf == NULL) {
3020 xmlGenericError(xmlGenericErrorContext,
3021 "realloc of %d byte failed\n", size);
3022 return(NULL);
3023 }
3024 }
3025 COPY_BUF(l,buf,len,c);
3026 NEXTL(l);
3027 c = CUR_CHAR(l);
3028 }
3029 if (c != '"') {
3030 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3032 ctxt->sax->error(ctxt->userData,
3033 "String not closed \"%.50s\"\n", buf);
3034 ctxt->wellFormed = 0;
3035 ctxt->disableSAX = 1;
3036 } else {
3037 NEXT;
3038 }
3039 } else if (RAW == '\''){
3040 NEXT;
3041 c = CUR;
3042 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3043 if (len + 1 >= size) {
3044 size *= 2;
3045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3046 if (buf == NULL) {
3047 xmlGenericError(xmlGenericErrorContext,
3048 "realloc of %d byte failed\n", size);
3049 return(NULL);
3050 }
3051 }
3052 buf[len++] = c;
3053 NEXT;
3054 c = CUR;
3055 }
3056 if (RAW != '\'') {
3057 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3059 ctxt->sax->error(ctxt->userData,
3060 "String not closed \"%.50s\"\n", buf);
3061 ctxt->wellFormed = 0;
3062 ctxt->disableSAX = 1;
3063 } else {
3064 NEXT;
3065 }
3066 }
3067 return(buf);
3068#endif
3069}
3070
3071/**
3072 * xmlParseNamespace:
3073 * @ctxt: an XML parser context
3074 *
3075 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3076 *
3077 * This is what the older xml-name Working Draft specified, a bunch of
3078 * other stuff may still rely on it, so support is still here as
3079 * if it was declared on the root of the Tree:-(
3080 *
3081 * TODO: remove from library
3082 *
3083 * To be removed at next drop of binary compatibility
3084 */
3085
3086void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003087xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003088 static int deprecated = 0;
3089 if (!deprecated) {
3090 xmlGenericError(xmlGenericErrorContext,
3091 "xmlParseNamespace() deprecated function reached\n");
3092 deprecated = 1;
3093 }
3094
3095#if 0
3096 xmlChar *href = NULL;
3097 xmlChar *prefix = NULL;
3098 int garbage = 0;
3099
3100 /*
3101 * We just skipped "namespace" or "xml:namespace"
3102 */
3103 SKIP_BLANKS;
3104
3105xmlGenericError(xmlGenericErrorContext,
3106 "xmlParseNamespace: reached loop 5\n");
3107 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3108 /*
3109 * We can have "ns" or "prefix" attributes
3110 * Old encoding as 'href' or 'AS' attributes is still supported
3111 */
3112 if ((RAW == 'n') && (NXT(1) == 's')) {
3113 garbage = 0;
3114 SKIP(2);
3115 SKIP_BLANKS;
3116
3117 if (RAW != '=') continue;
3118 NEXT;
3119 SKIP_BLANKS;
3120
3121 href = xmlParseQuotedString(ctxt);
3122 SKIP_BLANKS;
3123 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3124 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3125 garbage = 0;
3126 SKIP(4);
3127 SKIP_BLANKS;
3128
3129 if (RAW != '=') continue;
3130 NEXT;
3131 SKIP_BLANKS;
3132
3133 href = xmlParseQuotedString(ctxt);
3134 SKIP_BLANKS;
3135 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3136 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3137 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3138 garbage = 0;
3139 SKIP(6);
3140 SKIP_BLANKS;
3141
3142 if (RAW != '=') continue;
3143 NEXT;
3144 SKIP_BLANKS;
3145
3146 prefix = xmlParseQuotedString(ctxt);
3147 SKIP_BLANKS;
3148 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3149 garbage = 0;
3150 SKIP(2);
3151 SKIP_BLANKS;
3152
3153 if (RAW != '=') continue;
3154 NEXT;
3155 SKIP_BLANKS;
3156
3157 prefix = xmlParseQuotedString(ctxt);
3158 SKIP_BLANKS;
3159 } else if ((RAW == '?') && (NXT(1) == '>')) {
3160 garbage = 0;
3161 NEXT;
3162 } else {
3163 /*
3164 * Found garbage when parsing the namespace
3165 */
3166 if (!garbage) {
3167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3168 ctxt->sax->error(ctxt->userData,
3169 "xmlParseNamespace found garbage\n");
3170 }
3171 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3172 ctxt->wellFormed = 0;
3173 ctxt->disableSAX = 1;
3174 NEXT;
3175 }
3176 }
3177
3178 MOVETO_ENDTAG(CUR_PTR);
3179 NEXT;
3180
3181 /*
3182 * Register the DTD.
3183 if (href != NULL)
3184 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3185 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3186 */
3187
3188 if (prefix != NULL) xmlFree(prefix);
3189 if (href != NULL) xmlFree(href);
3190#endif
3191}
3192
3193/**
3194 * xmlScanName:
3195 * @ctxt: an XML parser context
3196 *
3197 * Trickery: parse an XML name but without consuming the input flow
3198 * Needed for rollback cases. Used only when parsing entities references.
3199 *
3200 * TODO: seems deprecated now, only used in the default part of
3201 * xmlParserHandleReference
3202 *
3203 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3204 * CombiningChar | Extender
3205 *
3206 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3207 *
3208 * [6] Names ::= Name (S Name)*
3209 *
3210 * Returns the Name parsed or NULL
3211 */
3212
3213xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003214xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003215 static int deprecated = 0;
3216 if (!deprecated) {
3217 xmlGenericError(xmlGenericErrorContext,
3218 "xmlScanName() deprecated function reached\n");
3219 deprecated = 1;
3220 }
3221 return(NULL);
3222
3223#if 0
3224 xmlChar buf[XML_MAX_NAMELEN];
3225 int len = 0;
3226
3227 GROW;
3228 if (!IS_LETTER(RAW) && (RAW != '_') &&
3229 (RAW != ':')) {
3230 return(NULL);
3231 }
3232
3233
3234 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3235 (NXT(len) == '.') || (NXT(len) == '-') ||
3236 (NXT(len) == '_') || (NXT(len) == ':') ||
3237 (IS_COMBINING(NXT(len))) ||
3238 (IS_EXTENDER(NXT(len)))) {
3239 GROW;
3240 buf[len] = NXT(len);
3241 len++;
3242 if (len >= XML_MAX_NAMELEN) {
3243 xmlGenericError(xmlGenericErrorContext,
3244 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3245 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3246 (IS_DIGIT(NXT(len))) ||
3247 (NXT(len) == '.') || (NXT(len) == '-') ||
3248 (NXT(len) == '_') || (NXT(len) == ':') ||
3249 (IS_COMBINING(NXT(len))) ||
3250 (IS_EXTENDER(NXT(len))))
3251 len++;
3252 break;
3253 }
3254 }
3255 return(xmlStrndup(buf, len));
3256#endif
3257}
3258
3259/**
3260 * xmlParserHandleReference:
3261 * @ctxt: the parser context
3262 *
3263 * TODO: Remove, now deprecated ... the test is done directly in the
3264 * content parsing
3265 * routines.
3266 *
3267 * [67] Reference ::= EntityRef | CharRef
3268 *
3269 * [68] EntityRef ::= '&' Name ';'
3270 *
3271 * [ WFC: Entity Declared ]
3272 * the Name given in the entity reference must match that in an entity
3273 * declaration, except that well-formed documents need not declare any
3274 * of the following entities: amp, lt, gt, apos, quot.
3275 *
3276 * [ WFC: Parsed Entity ]
3277 * An entity reference must not contain the name of an unparsed entity
3278 *
3279 * [66] CharRef ::= '&#' [0-9]+ ';' |
3280 * '&#x' [0-9a-fA-F]+ ';'
3281 *
3282 * A PEReference may have been detectect in the current input stream
3283 * the handling is done accordingly to
3284 * http://www.w3.org/TR/REC-xml#entproc
3285 */
3286void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003287xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003288 static int deprecated = 0;
3289 if (!deprecated) {
3290 xmlGenericError(xmlGenericErrorContext,
3291 "xmlParserHandleReference() deprecated function reached\n");
3292 deprecated = 1;
3293 }
3294
3295#if 0
3296 xmlParserInputPtr input;
3297 xmlChar *name;
3298 xmlEntityPtr ent = NULL;
3299
3300 if (ctxt->token != 0) {
3301 return;
3302 }
3303 if (RAW != '&') return;
3304 GROW;
3305 if ((RAW == '&') && (NXT(1) == '#')) {
3306 switch(ctxt->instate) {
3307 case XML_PARSER_ENTITY_DECL:
3308 case XML_PARSER_PI:
3309 case XML_PARSER_CDATA_SECTION:
3310 case XML_PARSER_COMMENT:
3311 case XML_PARSER_SYSTEM_LITERAL:
3312 /* we just ignore it there */
3313 return;
3314 case XML_PARSER_START_TAG:
3315 return;
3316 case XML_PARSER_END_TAG:
3317 return;
3318 case XML_PARSER_EOF:
3319 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3321 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3322 ctxt->wellFormed = 0;
3323 ctxt->disableSAX = 1;
3324 return;
3325 case XML_PARSER_PROLOG:
3326 case XML_PARSER_START:
3327 case XML_PARSER_MISC:
3328 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3330 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3331 ctxt->wellFormed = 0;
3332 ctxt->disableSAX = 1;
3333 return;
3334 case XML_PARSER_EPILOG:
3335 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3338 ctxt->wellFormed = 0;
3339 ctxt->disableSAX = 1;
3340 return;
3341 case XML_PARSER_DTD:
3342 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData,
3345 "CharRef are forbiden in DTDs!\n");
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 case XML_PARSER_ENTITY_VALUE:
3350 /*
3351 * NOTE: in the case of entity values, we don't do the
3352 * substitution here since we need the literal
3353 * entity value to be able to save the internal
3354 * subset of the document.
3355 * This will be handled by xmlStringDecodeEntities
3356 */
3357 return;
3358 case XML_PARSER_CONTENT:
3359 return;
3360 case XML_PARSER_ATTRIBUTE_VALUE:
3361 /* ctxt->token = xmlParseCharRef(ctxt); */
3362 return;
3363 case XML_PARSER_IGNORE:
3364 return;
3365 }
3366 return;
3367 }
3368
3369 switch(ctxt->instate) {
3370 case XML_PARSER_CDATA_SECTION:
3371 return;
3372 case XML_PARSER_PI:
3373 case XML_PARSER_COMMENT:
3374 case XML_PARSER_SYSTEM_LITERAL:
3375 case XML_PARSER_CONTENT:
3376 return;
3377 case XML_PARSER_START_TAG:
3378 return;
3379 case XML_PARSER_END_TAG:
3380 return;
3381 case XML_PARSER_EOF:
3382 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 return;
3388 case XML_PARSER_PROLOG:
3389 case XML_PARSER_START:
3390 case XML_PARSER_MISC:
3391 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3394 ctxt->wellFormed = 0;
3395 ctxt->disableSAX = 1;
3396 return;
3397 case XML_PARSER_EPILOG:
3398 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3400 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3401 ctxt->wellFormed = 0;
3402 ctxt->disableSAX = 1;
3403 return;
3404 case XML_PARSER_ENTITY_VALUE:
3405 /*
3406 * NOTE: in the case of entity values, we don't do the
3407 * substitution here since we need the literal
3408 * entity value to be able to save the internal
3409 * subset of the document.
3410 * This will be handled by xmlStringDecodeEntities
3411 */
3412 return;
3413 case XML_PARSER_ATTRIBUTE_VALUE:
3414 /*
3415 * NOTE: in the case of attributes values, we don't do the
3416 * substitution here unless we are in a mode where
3417 * the parser is explicitely asked to substitute
3418 * entities. The SAX callback is called with values
3419 * without entity substitution.
3420 * This will then be handled by xmlStringDecodeEntities
3421 */
3422 return;
3423 case XML_PARSER_ENTITY_DECL:
3424 /*
3425 * we just ignore it there
3426 * the substitution will be done once the entity is referenced
3427 */
3428 return;
3429 case XML_PARSER_DTD:
3430 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3432 ctxt->sax->error(ctxt->userData,
3433 "Entity references are forbiden in DTDs!\n");
3434 ctxt->wellFormed = 0;
3435 ctxt->disableSAX = 1;
3436 return;
3437 case XML_PARSER_IGNORE:
3438 return;
3439 }
3440
3441/* TODO: this seems not reached anymore .... Verify ... */
3442xmlGenericError(xmlGenericErrorContext,
3443 "Reached deprecated section in xmlParserHandleReference()\n");
3444xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003445 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003446xmlGenericError(xmlGenericErrorContext,
3447 "indicating the version: %s, thanks !\n", xmlParserVersion);
3448 NEXT;
3449 name = xmlScanName(ctxt);
3450 if (name == NULL) {
3451 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3454 ctxt->wellFormed = 0;
3455 ctxt->disableSAX = 1;
3456 ctxt->token = '&';
3457 return;
3458 }
3459 if (NXT(xmlStrlen(name)) != ';') {
3460 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "Entity reference: ';' expected\n");
3464 ctxt->wellFormed = 0;
3465 ctxt->disableSAX = 1;
3466 ctxt->token = '&';
3467 xmlFree(name);
3468 return;
3469 }
3470 SKIP(xmlStrlen(name) + 1);
3471 if (ctxt->sax != NULL) {
3472 if (ctxt->sax->getEntity != NULL)
3473 ent = ctxt->sax->getEntity(ctxt->userData, name);
3474 }
3475
3476 /*
3477 * [ WFC: Entity Declared ]
3478 * the Name given in the entity reference must match that in an entity
3479 * declaration, except that well-formed documents need not declare any
3480 * of the following entities: amp, lt, gt, apos, quot.
3481 */
3482 if (ent == NULL)
3483 ent = xmlGetPredefinedEntity(name);
3484 if (ent == NULL) {
3485 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3487 ctxt->sax->error(ctxt->userData,
3488 "Entity reference: entity %s not declared\n",
3489 name);
3490 ctxt->wellFormed = 0;
3491 ctxt->disableSAX = 1;
3492 xmlFree(name);
3493 return;
3494 }
3495
3496 /*
3497 * [ WFC: Parsed Entity ]
3498 * An entity reference must not contain the name of an unparsed entity
3499 */
3500 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3501 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3503 ctxt->sax->error(ctxt->userData,
3504 "Entity reference to unparsed entity %s\n", name);
3505 ctxt->wellFormed = 0;
3506 ctxt->disableSAX = 1;
3507 }
3508
3509 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3510 ctxt->token = ent->content[0];
3511 xmlFree(name);
3512 return;
3513 }
3514 input = xmlNewEntityInputStream(ctxt, ent);
3515 xmlPushInput(ctxt, input);
3516 xmlFree(name);
3517#endif
3518 return;
3519}
3520
3521/**
3522 * xmlHandleEntity:
3523 * @ctxt: an XML parser context
3524 * @entity: an XML entity pointer.
3525 *
3526 * Default handling of defined entities, when should we define a new input
3527 * stream ? When do we just handle that as a set of chars ?
3528 *
3529 * OBSOLETE: to be removed at some point.
3530 */
3531
3532void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003533xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003534 static int deprecated = 0;
3535 if (!deprecated) {
3536 xmlGenericError(xmlGenericErrorContext,
3537 "xmlHandleEntity() deprecated function reached\n");
3538 deprecated = 1;
3539 }
3540
3541#if 0
3542 int len;
3543 xmlParserInputPtr input;
3544
3545 if (entity->content == NULL) {
3546 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3548 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3549 entity->name);
3550 ctxt->wellFormed = 0;
3551 ctxt->disableSAX = 1;
3552 return;
3553 }
3554 len = xmlStrlen(entity->content);
3555 if (len <= 2) goto handle_as_char;
3556
3557 /*
3558 * Redefine its content as an input stream.
3559 */
3560 input = xmlNewEntityInputStream(ctxt, entity);
3561 xmlPushInput(ctxt, input);
3562 return;
3563
3564handle_as_char:
3565 /*
3566 * Just handle the content as a set of chars.
3567 */
3568 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3569 (ctxt->sax->characters != NULL))
3570 ctxt->sax->characters(ctxt->userData, entity->content, len);
3571#endif
3572}
3573
3574/**
3575 * xmlNewGlobalNs:
3576 * @doc: the document carrying the namespace
3577 * @href: the URI associated
3578 * @prefix: the prefix for the namespace
3579 *
3580 * Creation of a Namespace, the old way using PI and without scoping
3581 * DEPRECATED !!!
3582 * It now create a namespace on the root element of the document if found.
3583 * Returns NULL this functionnality had been removed
3584 */
3585xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003586xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3587 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003588 static int deprecated = 0;
3589 if (!deprecated) {
3590 xmlGenericError(xmlGenericErrorContext,
3591 "xmlNewGlobalNs() deprecated function reached\n");
3592 deprecated = 1;
3593 }
3594 return(NULL);
3595#if 0
3596 xmlNodePtr root;
3597
3598 xmlNsPtr cur;
3599
3600 root = xmlDocGetRootElement(doc);
3601 if (root != NULL)
3602 return(xmlNewNs(root, href, prefix));
3603
3604 /*
3605 * if there is no root element yet, create an old Namespace type
3606 * and it will be moved to the root at save time.
3607 */
3608 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3609 if (cur == NULL) {
3610 xmlGenericError(xmlGenericErrorContext,
3611 "xmlNewGlobalNs : malloc failed\n");
3612 return(NULL);
3613 }
3614 memset(cur, 0, sizeof(xmlNs));
3615 cur->type = XML_GLOBAL_NAMESPACE;
3616
3617 if (href != NULL)
3618 cur->href = xmlStrdup(href);
3619 if (prefix != NULL)
3620 cur->prefix = xmlStrdup(prefix);
3621
3622 /*
3623 * Add it at the end to preserve parsing order ...
3624 */
3625 if (doc != NULL) {
3626 if (doc->oldNs == NULL) {
3627 doc->oldNs = cur;
3628 } else {
3629 xmlNsPtr prev = doc->oldNs;
3630
3631 while (prev->next != NULL) prev = prev->next;
3632 prev->next = cur;
3633 }
3634 }
3635
3636 return(NULL);
3637#endif
3638}
3639
3640/**
3641 * xmlUpgradeOldNs:
3642 * @doc: a document pointer
3643 *
3644 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3645 * DEPRECATED
3646 */
3647void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003648xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 static int deprecated = 0;
3650 if (!deprecated) {
3651 xmlGenericError(xmlGenericErrorContext,
3652 "xmlNewGlobalNs() deprecated function reached\n");
3653 deprecated = 1;
3654 }
3655#if 0
3656 xmlNsPtr cur;
3657
3658 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3659 if (doc->children == NULL) {
3660#ifdef DEBUG_TREE
3661 xmlGenericError(xmlGenericErrorContext,
3662 "xmlUpgradeOldNs: failed no root !\n");
3663#endif
3664 return;
3665 }
3666
3667 cur = doc->oldNs;
3668 while (cur->next != NULL) {
3669 cur->type = XML_LOCAL_NAMESPACE;
3670 cur = cur->next;
3671 }
3672 cur->type = XML_LOCAL_NAMESPACE;
3673 cur->next = doc->children->nsDef;
3674 doc->children->nsDef = doc->oldNs;
3675 doc->oldNs = NULL;
3676#endif
3677}
3678