blob: 68553c259a508916908bc3b7c68c5f4eb3e310ea [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Owen Taylor3473f882001-02-23 17:55:21 +000053
Daniel Veillard56a4cb82001-03-24 17:00:36 +000054void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillarda53c6882001-07-25 17:18:57 +000056/*
57 * Various global defaults for parsing
58 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000059#ifdef VMS
60int xmlSubstituteEntitiesDefaultVal = 0;
61#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
62int xmlDoValidityCheckingDefaultVal = 0;
63#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000064#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065
Daniel Veillard5e2dace2001-07-18 19:30:27 +000066/**
Owen Taylor3473f882001-02-23 17:55:21 +000067 * xmlCheckVersion:
68 * @version: the include version number
69 *
70 * check the compiled lib version against the include one.
71 * This can warn or immediately kill the application
72 */
73void
74xmlCheckVersion(int version) {
75 int myversion = (int) LIBXML_VERSION;
76
Daniel Veillard6f350292001-10-14 09:56:15 +000077 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000078
Owen Taylor3473f882001-02-23 17:55:21 +000079 if ((myversion / 10000) != (version / 10000)) {
80 xmlGenericError(xmlGenericErrorContext,
81 "Fatal: program compiled against libxml %d using libxml %d\n",
82 (version / 10000), (myversion / 10000));
83 exit(1);
84 }
85 if ((myversion / 100) < (version / 100)) {
86 xmlGenericError(xmlGenericErrorContext,
87 "Warning: program compiled against libxml %d using older %d\n",
88 (version / 100), (myversion / 100));
89 }
90}
91
92
Daniel Veillard22090732001-07-16 00:06:07 +000093static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000094 "validate",
95 "load subset",
96 "keep blanks",
97 "disable SAX",
98 "fetch external entities",
99 "substitute entities",
100 "gather line info",
101 "user data",
102 "is html",
103 "is standalone",
104 "stop parser",
105 "document",
106 "is well formed",
107 "is valid",
108 "SAX block",
109 "SAX function internalSubset",
110 "SAX function isStandalone",
111 "SAX function hasInternalSubset",
112 "SAX function hasExternalSubset",
113 "SAX function resolveEntity",
114 "SAX function getEntity",
115 "SAX function entityDecl",
116 "SAX function notationDecl",
117 "SAX function attributeDecl",
118 "SAX function elementDecl",
119 "SAX function unparsedEntityDecl",
120 "SAX function setDocumentLocator",
121 "SAX function startDocument",
122 "SAX function endDocument",
123 "SAX function startElement",
124 "SAX function endElement",
125 "SAX function reference",
126 "SAX function characters",
127 "SAX function ignorableWhitespace",
128 "SAX function processingInstruction",
129 "SAX function comment",
130 "SAX function warning",
131 "SAX function error",
132 "SAX function fatalError",
133 "SAX function getParameterEntity",
134 "SAX function cdataBlock",
135 "SAX function externalSubset",
136};
137
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000138/**
Owen Taylor3473f882001-02-23 17:55:21 +0000139 * xmlGetFeaturesList:
140 * @len: the length of the features name array (input/output)
141 * @result: an array of string to be filled with the features name.
142 *
143 * Copy at most *@len feature names into the @result array
144 *
145 * Returns -1 in case or error, or the total number of features,
146 * len is updated with the number of strings copied,
147 * strings must not be deallocated
148 */
149int
150xmlGetFeaturesList(int *len, const char **result) {
151 int ret, i;
152
153 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
154 if ((len == NULL) || (result == NULL))
155 return(ret);
156 if ((*len < 0) || (*len >= 1000))
157 return(-1);
158 if (*len > ret)
159 *len = ret;
160 for (i = 0;i < *len;i++)
161 result[i] = xmlFeaturesList[i];
162 return(ret);
163}
164
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000165/**
Owen Taylor3473f882001-02-23 17:55:21 +0000166 * xmlGetFeature:
167 * @ctxt: an XML/HTML parser context
168 * @name: the feature name
169 * @result: location to store the result
170 *
171 * Read the current value of one feature of this parser instance
172 *
173 * Returns -1 in case or error, 0 otherwise
174 */
175int
176xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
177 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
178 return(-1);
179
180 if (!strcmp(name, "validate")) {
181 *((int *) result) = ctxt->validate;
182 } else if (!strcmp(name, "keep blanks")) {
183 *((int *) result) = ctxt->keepBlanks;
184 } else if (!strcmp(name, "disable SAX")) {
185 *((int *) result) = ctxt->disableSAX;
186 } else if (!strcmp(name, "fetch external entities")) {
187 *((int *) result) = ctxt->loadsubset;
188 } else if (!strcmp(name, "substitute entities")) {
189 *((int *) result) = ctxt->replaceEntities;
190 } else if (!strcmp(name, "gather line info")) {
191 *((int *) result) = ctxt->record_info;
192 } else if (!strcmp(name, "user data")) {
193 *((void **)result) = ctxt->userData;
194 } else if (!strcmp(name, "is html")) {
195 *((int *) result) = ctxt->html;
196 } else if (!strcmp(name, "is standalone")) {
197 *((int *) result) = ctxt->standalone;
198 } else if (!strcmp(name, "document")) {
199 *((xmlDocPtr *) result) = ctxt->myDoc;
200 } else if (!strcmp(name, "is well formed")) {
201 *((int *) result) = ctxt->wellFormed;
202 } else if (!strcmp(name, "is valid")) {
203 *((int *) result) = ctxt->valid;
204 } else if (!strcmp(name, "SAX block")) {
205 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
206 } else if (!strcmp(name, "SAX function internalSubset")) {
207 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
208 } else if (!strcmp(name, "SAX function isStandalone")) {
209 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
210 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
211 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
212 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
213 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
214 } else if (!strcmp(name, "SAX function resolveEntity")) {
215 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
216 } else if (!strcmp(name, "SAX function getEntity")) {
217 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
218 } else if (!strcmp(name, "SAX function entityDecl")) {
219 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
220 } else if (!strcmp(name, "SAX function notationDecl")) {
221 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
222 } else if (!strcmp(name, "SAX function attributeDecl")) {
223 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
224 } else if (!strcmp(name, "SAX function elementDecl")) {
225 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
226 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
227 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
228 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
229 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
230 } else if (!strcmp(name, "SAX function startDocument")) {
231 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
232 } else if (!strcmp(name, "SAX function endDocument")) {
233 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
234 } else if (!strcmp(name, "SAX function startElement")) {
235 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
236 } else if (!strcmp(name, "SAX function endElement")) {
237 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
238 } else if (!strcmp(name, "SAX function reference")) {
239 *((referenceSAXFunc *) result) = ctxt->sax->reference;
240 } else if (!strcmp(name, "SAX function characters")) {
241 *((charactersSAXFunc *) result) = ctxt->sax->characters;
242 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
243 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
244 } else if (!strcmp(name, "SAX function processingInstruction")) {
245 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
246 } else if (!strcmp(name, "SAX function comment")) {
247 *((commentSAXFunc *) result) = ctxt->sax->comment;
248 } else if (!strcmp(name, "SAX function warning")) {
249 *((warningSAXFunc *) result) = ctxt->sax->warning;
250 } else if (!strcmp(name, "SAX function error")) {
251 *((errorSAXFunc *) result) = ctxt->sax->error;
252 } else if (!strcmp(name, "SAX function fatalError")) {
253 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
254 } else if (!strcmp(name, "SAX function getParameterEntity")) {
255 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
256 } else if (!strcmp(name, "SAX function cdataBlock")) {
257 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
258 } else if (!strcmp(name, "SAX function externalSubset")) {
259 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
260 } else {
261 return(-1);
262 }
263 return(0);
264}
265
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000266/**
Owen Taylor3473f882001-02-23 17:55:21 +0000267 * xmlSetFeature:
268 * @ctxt: an XML/HTML parser context
269 * @name: the feature name
270 * @value: pointer to the location of the new value
271 *
272 * Change the current value of one feature of this parser instance
273 *
274 * Returns -1 in case or error, 0 otherwise
275 */
276int
277xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
278 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
279 return(-1);
280
281 if (!strcmp(name, "validate")) {
282 int newvalidate = *((int *) value);
283 if ((!ctxt->validate) && (newvalidate != 0)) {
284 if (ctxt->vctxt.warning == NULL)
285 ctxt->vctxt.warning = xmlParserValidityWarning;
286 if (ctxt->vctxt.error == NULL)
287 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000288 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000289 }
290 ctxt->validate = newvalidate;
291 } else if (!strcmp(name, "keep blanks")) {
292 ctxt->keepBlanks = *((int *) value);
293 } else if (!strcmp(name, "disable SAX")) {
294 ctxt->disableSAX = *((int *) value);
295 } else if (!strcmp(name, "fetch external entities")) {
296 ctxt->loadsubset = *((int *) value);
297 } else if (!strcmp(name, "substitute entities")) {
298 ctxt->replaceEntities = *((int *) value);
299 } else if (!strcmp(name, "gather line info")) {
300 ctxt->record_info = *((int *) value);
301 } else if (!strcmp(name, "user data")) {
302 ctxt->userData = *((void **)value);
303 } else if (!strcmp(name, "is html")) {
304 ctxt->html = *((int *) value);
305 } else if (!strcmp(name, "is standalone")) {
306 ctxt->standalone = *((int *) value);
307 } else if (!strcmp(name, "document")) {
308 ctxt->myDoc = *((xmlDocPtr *) value);
309 } else if (!strcmp(name, "is well formed")) {
310 ctxt->wellFormed = *((int *) value);
311 } else if (!strcmp(name, "is valid")) {
312 ctxt->valid = *((int *) value);
313 } else if (!strcmp(name, "SAX block")) {
314 ctxt->sax = *((xmlSAXHandlerPtr *) value);
315 } else if (!strcmp(name, "SAX function internalSubset")) {
316 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function isStandalone")) {
318 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
320 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
322 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function resolveEntity")) {
324 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function getEntity")) {
326 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
327 } else if (!strcmp(name, "SAX function entityDecl")) {
328 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function notationDecl")) {
330 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function attributeDecl")) {
332 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function elementDecl")) {
334 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
336 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
338 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function startDocument")) {
340 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function endDocument")) {
342 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function startElement")) {
344 ctxt->sax->startElement = *((startElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function endElement")) {
346 ctxt->sax->endElement = *((endElementSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function reference")) {
348 ctxt->sax->reference = *((referenceSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function characters")) {
350 ctxt->sax->characters = *((charactersSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
352 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function processingInstruction")) {
354 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function comment")) {
356 ctxt->sax->comment = *((commentSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function warning")) {
358 ctxt->sax->warning = *((warningSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function error")) {
360 ctxt->sax->error = *((errorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function fatalError")) {
362 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function getParameterEntity")) {
364 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
365 } else if (!strcmp(name, "SAX function cdataBlock")) {
366 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function externalSubset")) {
368 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
369 } else {
370 return(-1);
371 }
372 return(0);
373}
374
375/************************************************************************
376 * *
377 * Some functions to avoid too large macros *
378 * *
379 ************************************************************************/
380
381/**
382 * xmlIsChar:
383 * @c: an unicode character (int)
384 *
385 * Check whether the character is allowed by the production
386 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
387 * | [#x10000-#x10FFFF]
388 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
389 * Also available as a macro IS_CHAR()
390 *
391 * Returns 0 if not, non-zero otherwise
392 */
393int
394xmlIsChar(int c) {
395 return(
396 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
397 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
398 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
399 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
400}
401
402/**
403 * xmlIsBlank:
404 * @c: an unicode character (int)
405 *
406 * Check whether the character is allowed by the production
407 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
408 * Also available as a macro IS_BLANK()
409 *
410 * Returns 0 if not, non-zero otherwise
411 */
412int
413xmlIsBlank(int c) {
414 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415}
416
417/**
418 * xmlIsBaseChar:
419 * @c: an unicode character (int)
420 *
421 * Check whether the character is allowed by the production
422 * [85] BaseChar ::= ... long list see REC ...
423 *
424 * VI is your friend !
425 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
426 * and
427 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
428 *
429 * Returns 0 if not, non-zero otherwise
430 */
431static int xmlBaseArray[] = {
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
438 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
447 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
448};
449
450int
451xmlIsBaseChar(int c) {
452 return(
453 (((c) < 0x0100) ? xmlBaseArray[c] :
454 ( /* accelerator */
455 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
456 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
457 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
458 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
459 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
460 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
461 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
462 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
463 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
464 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
465 ((c) == 0x0386) ||
466 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
467 ((c) == 0x038C) ||
468 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
469 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
470 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
471 ((c) == 0x03DA) ||
472 ((c) == 0x03DC) ||
473 ((c) == 0x03DE) ||
474 ((c) == 0x03E0) ||
475 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
476 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
477 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
478 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
479 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
480 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
481 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
482 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
483 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
484 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
485 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
486 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
487 ((c) == 0x0559) ||
488 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
489 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
490 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
491 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
492 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
493 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
494 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
495 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
496 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
497 ((c) == 0x06D5) ||
498 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
499 (((c) >= 0x905) && ( /* accelerator */
500 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
501 ((c) == 0x093D) ||
502 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
503 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
504 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
505 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
506 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
507 ((c) == 0x09B2) ||
508 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
509 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
510 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
511 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
512 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
513 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
514 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
515 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
516 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
517 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
518 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
519 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
520 ((c) == 0x0A5E) ||
521 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
522 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
523 ((c) == 0x0A8D) ||
524 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
525 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
526 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
527 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
528 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
529 ((c) == 0x0ABD) ||
530 ((c) == 0x0AE0) ||
531 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
532 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
533 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
534 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
535 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
536 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
537 ((c) == 0x0B3D) ||
538 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
539 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
540 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
541 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
542 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
543 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
544 ((c) == 0x0B9C) ||
545 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
546 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
547 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
548 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
549 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
550 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
551 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
552 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
553 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
554 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
555 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
556 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
557 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
558 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
559 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
560 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
561 ((c) == 0x0CDE) ||
562 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
563 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
564 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
565 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
566 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
567 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
568 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
569 ((c) == 0x0E30) ||
570 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
571 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
572 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
573 ((c) == 0x0E84) ||
574 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
575 ((c) == 0x0E8A) ||
576 ((c) == 0x0E8D) ||
577 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
578 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
579 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
580 ((c) == 0x0EA5) ||
581 ((c) == 0x0EA7) ||
582 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
583 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
584 ((c) == 0x0EB0) ||
585 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
586 ((c) == 0x0EBD) ||
587 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
588 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
589 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
590 (((c) >= 0x10A0) && ( /* accelerator */
591 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
592 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
593 ((c) == 0x1100) ||
594 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
595 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
596 ((c) == 0x1109) ||
597 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
598 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
599 ((c) == 0x113C) ||
600 ((c) == 0x113E) ||
601 ((c) == 0x1140) ||
602 ((c) == 0x114C) ||
603 ((c) == 0x114E) ||
604 ((c) == 0x1150) ||
605 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
606 ((c) == 0x1159) ||
607 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
608 ((c) == 0x1163) ||
609 ((c) == 0x1165) ||
610 ((c) == 0x1167) ||
611 ((c) == 0x1169) ||
612 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
613 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
614 ((c) == 0x1175) ||
615 ((c) == 0x119E) ||
616 ((c) == 0x11A8) ||
617 ((c) == 0x11AB) ||
618 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
619 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
620 ((c) == 0x11BA) ||
621 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
622 ((c) == 0x11EB) ||
623 ((c) == 0x11F0) ||
624 ((c) == 0x11F9) ||
625 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
626 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
627 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
628 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
629 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
630 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
631 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
632 ((c) == 0x1F59) ||
633 ((c) == 0x1F5B) ||
634 ((c) == 0x1F5D) ||
635 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
636 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
637 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
638 ((c) == 0x1FBE) ||
639 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
640 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
641 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
642 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
643 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
644 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
645 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
646 ((c) == 0x2126) ||
647 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
648 ((c) == 0x212E) ||
649 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
650 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
651 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
652 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
653 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
654}
655
656/**
657 * xmlIsDigit:
658 * @c: an unicode character (int)
659 *
660 * Check whether the character is allowed by the production
661 * [88] Digit ::= ... long list see REC ...
662 *
663 * Returns 0 if not, non-zero otherwise
664 */
665int
666xmlIsDigit(int c) {
667 return(
668 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
669 (((c) >= 0x660) && ( /* accelerator */
670 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
671 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
672 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
673 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
674 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
675 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
676 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
677 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
678 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
679 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
680 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
681 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
682 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
683 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
684}
685
686/**
687 * xmlIsCombining:
688 * @c: an unicode character (int)
689 *
690 * Check whether the character is allowed by the production
691 * [87] CombiningChar ::= ... long list see REC ...
692 *
693 * Returns 0 if not, non-zero otherwise
694 */
695int
696xmlIsCombining(int c) {
697 return(
698 (((c) >= 0x300) && ( /* accelerator */
699 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
700 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
701 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
702 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
703 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
704 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
705 ((c) == 0x05BF) ||
706 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
707 ((c) == 0x05C4) ||
708 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
709 ((c) == 0x0670) ||
710 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
711 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
712 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
713 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
714 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
715 (((c) >= 0x0901) && ( /* accelerator */
716 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
717 ((c) == 0x093C) ||
718 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
719 ((c) == 0x094D) ||
720 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
721 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
722 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
723 ((c) == 0x09BC) ||
724 ((c) == 0x09BE) ||
725 ((c) == 0x09BF) ||
726 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
727 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
728 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
729 ((c) == 0x09D7) ||
730 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
731 (((c) >= 0x0A02) && ( /* accelerator */
732 ((c) == 0x0A02) ||
733 ((c) == 0x0A3C) ||
734 ((c) == 0x0A3E) ||
735 ((c) == 0x0A3F) ||
736 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
737 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
738 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
739 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
740 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
741 ((c) == 0x0ABC) ||
742 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
743 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
744 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
745 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
746 ((c) == 0x0B3C) ||
747 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
748 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
749 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
750 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
751 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
752 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
753 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
754 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
755 ((c) == 0x0BD7) ||
756 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
757 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
758 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
759 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
760 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
761 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
762 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
763 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
764 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
765 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
766 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
767 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
768 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
769 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
770 ((c) == 0x0D57) ||
771 (((c) >= 0x0E31) && ( /* accelerator */
772 ((c) == 0x0E31) ||
773 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
774 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
775 ((c) == 0x0EB1) ||
776 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
777 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
778 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
779 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
780 ((c) == 0x0F35) ||
781 ((c) == 0x0F37) ||
782 ((c) == 0x0F39) ||
783 ((c) == 0x0F3E) ||
784 ((c) == 0x0F3F) ||
785 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
786 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
787 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
788 ((c) == 0x0F97) ||
789 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
790 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
791 ((c) == 0x0FB9) ||
792 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
793 ((c) == 0x20E1) ||
794 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
795 ((c) == 0x3099) ||
796 ((c) == 0x309A))))))))));
797}
798
799/**
800 * xmlIsExtender:
801 * @c: an unicode character (int)
802 *
803 * Check whether the character is allowed by the production
804 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
805 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
806 * [#x309D-#x309E] | [#x30FC-#x30FE]
807 *
808 * Returns 0 if not, non-zero otherwise
809 */
810int
811xmlIsExtender(int c) {
812 switch (c) {
813 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
814 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
815 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
816 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
817 case 0x30FE:
818 return 1;
819 default:
820 return 0;
821 }
822}
823
824/**
825 * xmlIsIdeographic:
826 * @c: an unicode character (int)
827 *
828 * Check whether the character is allowed by the production
829 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
830 *
831 * Returns 0 if not, non-zero otherwise
832 */
833int
834xmlIsIdeographic(int c) {
835 return(((c) < 0x0100) ? 0 :
836 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
837 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
838 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
839 ((c) == 0x3007));
840}
841
842/**
843 * xmlIsLetter:
844 * @c: an unicode character (int)
845 *
846 * Check whether the character is allowed by the production
847 * [84] Letter ::= BaseChar | Ideographic
848 *
849 * Returns 0 if not, non-zero otherwise
850 */
851int
852xmlIsLetter(int c) {
853 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
854}
855
856/**
857 * xmlIsPubidChar:
858 * @c: an unicode character (int)
859 *
860 * Check whether the character is allowed by the production
861 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
862 *
863 * Returns 0 if not, non-zero otherwise
864 */
865int
866xmlIsPubidChar(int c) {
867 return(
868 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
869 (((c) >= 'a') && ((c) <= 'z')) ||
870 (((c) >= 'A') && ((c) <= 'Z')) ||
871 (((c) >= '0') && ((c) <= '9')) ||
872 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
873 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
874 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
875 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
876 ((c) == '$') || ((c) == '_') || ((c) == '%'));
877}
878
879/************************************************************************
880 * *
881 * Input handling functions for progressive parsing *
882 * *
883 ************************************************************************/
884
885/* #define DEBUG_INPUT */
886/* #define DEBUG_STACK */
887/* #define DEBUG_PUSH */
888
889
890/* we need to keep enough input to show errors in context */
891#define LINE_LEN 80
892
893#ifdef DEBUG_INPUT
894#define CHECK_BUFFER(in) check_buffer(in)
895
896void check_buffer(xmlParserInputPtr in) {
897 if (in->base != in->buf->buffer->content) {
898 xmlGenericError(xmlGenericErrorContext,
899 "xmlParserInput: base mismatch problem\n");
900 }
901 if (in->cur < in->base) {
902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: cur < base problem\n");
904 }
905 if (in->cur > in->base + in->buf->buffer->use) {
906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur > base + use problem\n");
908 }
909 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
910 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
911 in->buf->buffer->use, in->buf->buffer->size);
912}
913
914#else
915#define CHECK_BUFFER(in)
916#endif
917
918
919/**
920 * xmlParserInputRead:
921 * @in: an XML parser input
922 * @len: an indicative size for the lookahead
923 *
924 * This function refresh the input for the parser. It doesn't try to
925 * preserve pointers to the input buffer, and discard already read data
926 *
927 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
928 * end of this entity
929 */
930int
931xmlParserInputRead(xmlParserInputPtr in, int len) {
932 int ret;
933 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000934 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000935
936#ifdef DEBUG_INPUT
937 xmlGenericError(xmlGenericErrorContext, "Read\n");
938#endif
939 if (in->buf == NULL) return(-1);
940 if (in->base == NULL) return(-1);
941 if (in->cur == NULL) return(-1);
942 if (in->buf->buffer == NULL) return(-1);
943 if (in->buf->readcallback == NULL) return(-1);
944
945 CHECK_BUFFER(in);
946
947 used = in->cur - in->buf->buffer->content;
948 ret = xmlBufferShrink(in->buf->buffer, used);
949 if (ret > 0) {
950 in->cur -= ret;
951 in->consumed += ret;
952 }
953 ret = xmlParserInputBufferRead(in->buf, len);
954 if (in->base != in->buf->buffer->content) {
955 /*
956 * the buffer has been realloced
957 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000958 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000959 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000960 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000961 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000962 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000963
964 CHECK_BUFFER(in);
965
966 return(ret);
967}
968
969/**
970 * xmlParserInputGrow:
971 * @in: an XML parser input
972 * @len: an indicative size for the lookahead
973 *
974 * This function increase the input for the parser. It tries to
975 * preserve pointers to the input buffer, and keep already read data
976 *
977 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
978 * end of this entity
979 */
980int
981xmlParserInputGrow(xmlParserInputPtr in, int len) {
982 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000983 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000984
985#ifdef DEBUG_INPUT
986 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987#endif
988 if (in->buf == NULL) return(-1);
989 if (in->base == NULL) return(-1);
990 if (in->cur == NULL) return(-1);
991 if (in->buf->buffer == NULL) return(-1);
992
993 CHECK_BUFFER(in);
994
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000995 indx = in->cur - in->base;
996 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +0000997
998 CHECK_BUFFER(in);
999
1000 return(0);
1001 }
1002 if (in->buf->readcallback != NULL)
1003 ret = xmlParserInputBufferGrow(in->buf, len);
1004 else
1005 return(0);
1006
1007 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001008 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001009 * block, but we use it really as an integer to do some
1010 * pointer arithmetic. Insure will raise it as a bug but in
1011 * that specific case, that's not !
1012 */
1013 if (in->base != in->buf->buffer->content) {
1014 /*
1015 * the buffer has been realloced
1016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001017 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001018 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001019 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001020 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001021 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001022
1023 CHECK_BUFFER(in);
1024
1025 return(ret);
1026}
1027
1028/**
1029 * xmlParserInputShrink:
1030 * @in: an XML parser input
1031 *
1032 * This function removes used input for the parser.
1033 */
1034void
1035xmlParserInputShrink(xmlParserInputPtr in) {
1036 int used;
1037 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001038 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001039
1040#ifdef DEBUG_INPUT
1041 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1042#endif
1043 if (in->buf == NULL) return;
1044 if (in->base == NULL) return;
1045 if (in->cur == NULL) return;
1046 if (in->buf->buffer == NULL) return;
1047
1048 CHECK_BUFFER(in);
1049
1050 used = in->cur - in->buf->buffer->content;
1051 /*
1052 * Do not shrink on large buffers whose only a tiny fraction
1053 * was consumned
1054 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001055 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001056 return;
1057 if (used > INPUT_CHUNK) {
1058 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1059 if (ret > 0) {
1060 in->cur -= ret;
1061 in->consumed += ret;
1062 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001063 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001064 }
1065
1066 CHECK_BUFFER(in);
1067
1068 if (in->buf->buffer->use > INPUT_CHUNK) {
1069 return;
1070 }
1071 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1072 if (in->base != in->buf->buffer->content) {
1073 /*
1074 * the buffer has been realloced
1075 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001076 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001077 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001078 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001079 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001080 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001081
1082 CHECK_BUFFER(in);
1083}
1084
1085/************************************************************************
1086 * *
1087 * UTF8 character input and related functions *
1088 * *
1089 ************************************************************************/
1090
1091/**
1092 * xmlNextChar:
1093 * @ctxt: the XML parser context
1094 *
1095 * Skip to the next char input char.
1096 */
1097
1098void
1099xmlNextChar(xmlParserCtxtPtr ctxt) {
1100 if (ctxt->instate == XML_PARSER_EOF)
1101 return;
1102
1103 /*
1104 * 2.11 End-of-Line Handling
1105 * the literal two-character sequence "#xD#xA" or a standalone
1106 * literal #xD, an XML processor must pass to the application
1107 * the single character #xA.
1108 */
1109 if (ctxt->token != 0) ctxt->token = 0;
1110 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1111 if ((*ctxt->input->cur == 0) &&
1112 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1113 (ctxt->instate != XML_PARSER_COMMENT)) {
1114 /*
1115 * If we are at the end of the current entity and
1116 * the context allows it, we pop consumed entities
1117 * automatically.
1118 * the auto closing should be blocked in other cases
1119 */
1120 xmlPopInput(ctxt);
1121 } else {
1122 if (*(ctxt->input->cur) == '\n') {
1123 ctxt->input->line++; ctxt->input->col = 1;
1124 } else ctxt->input->col++;
1125 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1126 /*
1127 * We are supposed to handle UTF8, check it's valid
1128 * From rfc2044: encoding of the Unicode values on UTF-8:
1129 *
1130 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1131 * 0000 0000-0000 007F 0xxxxxxx
1132 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1133 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1134 *
1135 * Check for the 0x110000 limit too
1136 */
1137 const unsigned char *cur = ctxt->input->cur;
1138 unsigned char c;
1139
1140 c = *cur;
1141 if (c & 0x80) {
1142 if (cur[1] == 0)
1143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1144 if ((cur[1] & 0xc0) != 0x80)
1145 goto encoding_error;
1146 if ((c & 0xe0) == 0xe0) {
1147 unsigned int val;
1148
1149 if (cur[2] == 0)
1150 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1151 if ((cur[2] & 0xc0) != 0x80)
1152 goto encoding_error;
1153 if ((c & 0xf0) == 0xf0) {
1154 if (cur[3] == 0)
1155 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1156 if (((c & 0xf8) != 0xf0) ||
1157 ((cur[3] & 0xc0) != 0x80))
1158 goto encoding_error;
1159 /* 4-byte code */
1160 ctxt->input->cur += 4;
1161 val = (cur[0] & 0x7) << 18;
1162 val |= (cur[1] & 0x3f) << 12;
1163 val |= (cur[2] & 0x3f) << 6;
1164 val |= cur[3] & 0x3f;
1165 } else {
1166 /* 3-byte code */
1167 ctxt->input->cur += 3;
1168 val = (cur[0] & 0xf) << 12;
1169 val |= (cur[1] & 0x3f) << 6;
1170 val |= cur[2] & 0x3f;
1171 }
1172 if (((val > 0xd7ff) && (val < 0xe000)) ||
1173 ((val > 0xfffd) && (val < 0x10000)) ||
1174 (val >= 0x110000)) {
1175 if ((ctxt->sax != NULL) &&
1176 (ctxt->sax->error != NULL))
1177 ctxt->sax->error(ctxt->userData,
1178 "Char 0x%X out of allowed range\n", val);
1179 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1180 ctxt->wellFormed = 0;
1181 ctxt->disableSAX = 1;
1182 }
1183 } else
1184 /* 2-byte code */
1185 ctxt->input->cur += 2;
1186 } else
1187 /* 1-byte code */
1188 ctxt->input->cur++;
1189 } else {
1190 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001191 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001192 * a compatibke encoding for the ASCII set, since
1193 * XML constructs only use < 128 chars
1194 */
1195 ctxt->input->cur++;
1196 }
1197 ctxt->nbChars++;
1198 if (*ctxt->input->cur == 0)
1199 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1200 }
1201 } else {
1202 ctxt->input->cur++;
1203 ctxt->nbChars++;
1204 if (*ctxt->input->cur == 0)
1205 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1206 }
1207 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1208 xmlParserHandlePEReference(ctxt);
1209 if ((*ctxt->input->cur == 0) &&
1210 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1211 xmlPopInput(ctxt);
1212 return;
1213encoding_error:
1214 /*
1215 * If we detect an UTF8 error that probably mean that the
1216 * input encoding didn't get properly advertized in the
1217 * declaration header. Report the error and switch the encoding
1218 * to ISO-Latin-1 (if you don't like this policy, just declare the
1219 * encoding !)
1220 */
1221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1222 ctxt->sax->error(ctxt->userData,
1223 "Input is not proper UTF-8, indicate encoding !\n");
1224 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1225 ctxt->input->cur[0], ctxt->input->cur[1],
1226 ctxt->input->cur[2], ctxt->input->cur[3]);
1227 }
1228 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1229
1230 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1231 ctxt->input->cur++;
1232 return;
1233}
1234
1235/**
1236 * xmlCurrentChar:
1237 * @ctxt: the XML parser context
1238 * @len: pointer to the length of the char read
1239 *
1240 * The current char value, if using UTF-8 this may actaully span multiple
1241 * bytes in the input buffer. Implement the end of line normalization:
1242 * 2.11 End-of-Line Handling
1243 * Wherever an external parsed entity or the literal entity value
1244 * of an internal parsed entity contains either the literal two-character
1245 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1246 * must pass to the application the single character #xA.
1247 * This behavior can conveniently be produced by normalizing all
1248 * line breaks to #xA on input, before parsing.)
1249 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001250 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001251 */
1252
1253int
1254xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1255 if (ctxt->instate == XML_PARSER_EOF)
1256 return(0);
1257
1258 if (ctxt->token != 0) {
1259 *len = 0;
1260 return(ctxt->token);
1261 }
1262 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1263 *len = 1;
1264 return((int) *ctxt->input->cur);
1265 }
1266 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1267 /*
1268 * We are supposed to handle UTF8, check it's valid
1269 * From rfc2044: encoding of the Unicode values on UTF-8:
1270 *
1271 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1272 * 0000 0000-0000 007F 0xxxxxxx
1273 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1274 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1275 *
1276 * Check for the 0x110000 limit too
1277 */
1278 const unsigned char *cur = ctxt->input->cur;
1279 unsigned char c;
1280 unsigned int val;
1281
1282 c = *cur;
1283 if (c & 0x80) {
1284 if (cur[1] == 0)
1285 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1286 if ((cur[1] & 0xc0) != 0x80)
1287 goto encoding_error;
1288 if ((c & 0xe0) == 0xe0) {
1289
1290 if (cur[2] == 0)
1291 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1292 if ((cur[2] & 0xc0) != 0x80)
1293 goto encoding_error;
1294 if ((c & 0xf0) == 0xf0) {
1295 if (cur[3] == 0)
1296 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1297 if (((c & 0xf8) != 0xf0) ||
1298 ((cur[3] & 0xc0) != 0x80))
1299 goto encoding_error;
1300 /* 4-byte code */
1301 *len = 4;
1302 val = (cur[0] & 0x7) << 18;
1303 val |= (cur[1] & 0x3f) << 12;
1304 val |= (cur[2] & 0x3f) << 6;
1305 val |= cur[3] & 0x3f;
1306 } else {
1307 /* 3-byte code */
1308 *len = 3;
1309 val = (cur[0] & 0xf) << 12;
1310 val |= (cur[1] & 0x3f) << 6;
1311 val |= cur[2] & 0x3f;
1312 }
1313 } else {
1314 /* 2-byte code */
1315 *len = 2;
1316 val = (cur[0] & 0x1f) << 6;
1317 val |= cur[1] & 0x3f;
1318 }
1319 if (!IS_CHAR(val)) {
1320 if ((ctxt->sax != NULL) &&
1321 (ctxt->sax->error != NULL))
1322 ctxt->sax->error(ctxt->userData,
1323 "Char 0x%X out of allowed range\n", val);
1324 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1325 ctxt->wellFormed = 0;
1326 ctxt->disableSAX = 1;
1327 }
1328 return(val);
1329 } else {
1330 /* 1-byte code */
1331 *len = 1;
1332 if (*ctxt->input->cur == 0xD) {
1333 if (ctxt->input->cur[1] == 0xA) {
1334 ctxt->nbChars++;
1335 ctxt->input->cur++;
1336 }
1337 return(0xA);
1338 }
1339 return((int) *ctxt->input->cur);
1340 }
1341 }
1342 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001343 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * a compatibke encoding for the ASCII set, since
1345 * XML constructs only use < 128 chars
1346 */
1347 *len = 1;
1348 if (*ctxt->input->cur == 0xD) {
1349 if (ctxt->input->cur[1] == 0xA) {
1350 ctxt->nbChars++;
1351 ctxt->input->cur++;
1352 }
1353 return(0xA);
1354 }
1355 return((int) *ctxt->input->cur);
1356encoding_error:
1357 /*
1358 * If we detect an UTF8 error that probably mean that the
1359 * input encoding didn't get properly advertized in the
1360 * declaration header. Report the error and switch the encoding
1361 * to ISO-Latin-1 (if you don't like this policy, just declare the
1362 * encoding !)
1363 */
1364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1365 ctxt->sax->error(ctxt->userData,
1366 "Input is not proper UTF-8, indicate encoding !\n");
1367 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1368 ctxt->input->cur[0], ctxt->input->cur[1],
1369 ctxt->input->cur[2], ctxt->input->cur[3]);
1370 }
1371 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1372
1373 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1374 *len = 1;
1375 return((int) *ctxt->input->cur);
1376}
1377
1378/**
1379 * xmlStringCurrentChar:
1380 * @ctxt: the XML parser context
1381 * @cur: pointer to the beginning of the char
1382 * @len: pointer to the length of the char read
1383 *
1384 * The current char value, if using UTF-8 this may actaully span multiple
1385 * bytes in the input buffer.
1386 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001387 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001388 */
1389
1390int
1391xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001392 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001393 /*
1394 * We are supposed to handle UTF8, check it's valid
1395 * From rfc2044: encoding of the Unicode values on UTF-8:
1396 *
1397 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1398 * 0000 0000-0000 007F 0xxxxxxx
1399 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1400 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1401 *
1402 * Check for the 0x110000 limit too
1403 */
1404 unsigned char c;
1405 unsigned int val;
1406
1407 c = *cur;
1408 if (c & 0x80) {
1409 if ((cur[1] & 0xc0) != 0x80)
1410 goto encoding_error;
1411 if ((c & 0xe0) == 0xe0) {
1412
1413 if ((cur[2] & 0xc0) != 0x80)
1414 goto encoding_error;
1415 if ((c & 0xf0) == 0xf0) {
1416 if (((c & 0xf8) != 0xf0) ||
1417 ((cur[3] & 0xc0) != 0x80))
1418 goto encoding_error;
1419 /* 4-byte code */
1420 *len = 4;
1421 val = (cur[0] & 0x7) << 18;
1422 val |= (cur[1] & 0x3f) << 12;
1423 val |= (cur[2] & 0x3f) << 6;
1424 val |= cur[3] & 0x3f;
1425 } else {
1426 /* 3-byte code */
1427 *len = 3;
1428 val = (cur[0] & 0xf) << 12;
1429 val |= (cur[1] & 0x3f) << 6;
1430 val |= cur[2] & 0x3f;
1431 }
1432 } else {
1433 /* 2-byte code */
1434 *len = 2;
1435 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001436 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001437 }
1438 if (!IS_CHAR(val)) {
1439 if ((ctxt->sax != NULL) &&
1440 (ctxt->sax->error != NULL))
1441 ctxt->sax->error(ctxt->userData,
1442 "Char 0x%X out of allowed range\n", val);
1443 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1444 ctxt->wellFormed = 0;
1445 ctxt->disableSAX = 1;
1446 }
1447 return(val);
1448 } else {
1449 /* 1-byte code */
1450 *len = 1;
1451 return((int) *cur);
1452 }
1453 }
1454 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001455 * Assume it's a fixed length encoding (1) with
Owen Taylor3473f882001-02-23 17:55:21 +00001456 * a compatibke encoding for the ASCII set, since
1457 * XML constructs only use < 128 chars
1458 */
1459 *len = 1;
1460 return((int) *cur);
1461encoding_error:
1462 /*
1463 * If we detect an UTF8 error that probably mean that the
1464 * input encoding didn't get properly advertized in the
1465 * declaration header. Report the error and switch the encoding
1466 * to ISO-Latin-1 (if you don't like this policy, just declare the
1467 * encoding !)
1468 */
1469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1470 ctxt->sax->error(ctxt->userData,
1471 "Input is not proper UTF-8, indicate encoding !\n");
1472 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1473 ctxt->input->cur[0], ctxt->input->cur[1],
1474 ctxt->input->cur[2], ctxt->input->cur[3]);
1475 }
1476 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1477
1478 *len = 1;
1479 return((int) *cur);
1480}
1481
1482/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001483 * xmlCopyCharMultiByte:
1484 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001485 * @val: the char value
1486 *
1487 * append the char value in the array
1488 *
1489 * Returns the number of xmlChar written
1490 */
Owen Taylor3473f882001-02-23 17:55:21 +00001491int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001492xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001493 /*
1494 * We are supposed to handle UTF8, check it's valid
1495 * From rfc2044: encoding of the Unicode values on UTF-8:
1496 *
1497 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1498 * 0000 0000-0000 007F 0xxxxxxx
1499 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1500 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1501 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001502 if (val >= 0x80) {
1503 xmlChar *savedout = out;
1504 int bits;
1505 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1506 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1507 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1508 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001509 xmlGenericError(xmlGenericErrorContext,
1510 "Internal error, xmlCopyChar 0x%X out of bound\n",
1511 val);
1512 return(0);
1513 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001514 for ( ; bits >= 0; bits-= 6)
1515 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1516 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 }
1518 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001519 return 1;
1520}
1521
1522/**
1523 * xmlCopyChar:
1524 * @len: Ignored, compatibility
1525 * @out: pointer to an arry of xmlChar
1526 * @val: the char value
1527 *
1528 * append the char value in the array
1529 *
1530 * Returns the number of xmlChar written
1531 */
1532
1533int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001534xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001535 /* the len parameter is ignored */
1536 if (val >= 0x80) {
1537 return(xmlCopyCharMultiByte (out, val));
1538 }
1539 *out = (xmlChar) val;
1540 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001541}
1542
1543/************************************************************************
1544 * *
1545 * Commodity functions to switch encodings *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSwitchEncoding:
1551 * @ctxt: the parser context
1552 * @enc: the encoding value (number)
1553 *
1554 * change the input functions when discovering the character encoding
1555 * of a given entity.
1556 *
1557 * Returns 0 in case of success, -1 otherwise
1558 */
1559int
1560xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1561{
1562 xmlCharEncodingHandlerPtr handler;
1563
1564 switch (enc) {
1565 case XML_CHAR_ENCODING_ERROR:
1566 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1568 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1569 ctxt->wellFormed = 0;
1570 ctxt->disableSAX = 1;
1571 break;
1572 case XML_CHAR_ENCODING_NONE:
1573 /* let's assume it's UTF-8 without the XML decl */
1574 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1575 return(0);
1576 case XML_CHAR_ENCODING_UTF8:
1577 /* default encoding, no conversion should be needed */
1578 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001579
1580 /*
1581 * Errata on XML-1.0 June 20 2001
1582 * Specific handling of the Byte Order Mark for
1583 * UTF-8
1584 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001585 if ((ctxt->input != NULL) &&
1586 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001587 (ctxt->input->cur[1] == 0xBB) &&
1588 (ctxt->input->cur[2] == 0xBF)) {
1589 ctxt->input->cur += 3;
1590 }
Owen Taylor3473f882001-02-23 17:55:21 +00001591 return(0);
1592 default:
1593 break;
1594 }
1595 handler = xmlGetCharEncodingHandler(enc);
1596 if (handler == NULL) {
1597 /*
1598 * Default handlers.
1599 */
1600 switch (enc) {
1601 case XML_CHAR_ENCODING_ERROR:
1602 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1605 ctxt->wellFormed = 0;
1606 ctxt->disableSAX = 1;
1607 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1608 break;
1609 case XML_CHAR_ENCODING_NONE:
1610 /* let's assume it's UTF-8 without the XML decl */
1611 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1612 return(0);
1613 case XML_CHAR_ENCODING_UTF8:
1614 case XML_CHAR_ENCODING_ASCII:
1615 /* default encoding, no conversion should be needed */
1616 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1617 return(0);
1618 case XML_CHAR_ENCODING_UTF16LE:
1619 break;
1620 case XML_CHAR_ENCODING_UTF16BE:
1621 break;
1622 case XML_CHAR_ENCODING_UCS4LE:
1623 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1625 ctxt->sax->error(ctxt->userData,
1626 "char encoding USC4 little endian not supported\n");
1627 break;
1628 case XML_CHAR_ENCODING_UCS4BE:
1629 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1631 ctxt->sax->error(ctxt->userData,
1632 "char encoding USC4 big endian not supported\n");
1633 break;
1634 case XML_CHAR_ENCODING_EBCDIC:
1635 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1637 ctxt->sax->error(ctxt->userData,
1638 "char encoding EBCDIC not supported\n");
1639 break;
1640 case XML_CHAR_ENCODING_UCS4_2143:
1641 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "char encoding UCS4 2143 not supported\n");
1645 break;
1646 case XML_CHAR_ENCODING_UCS4_3412:
1647 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649 ctxt->sax->error(ctxt->userData,
1650 "char encoding UCS4 3412 not supported\n");
1651 break;
1652 case XML_CHAR_ENCODING_UCS2:
1653 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "char encoding UCS2 not supported\n");
1657 break;
1658 case XML_CHAR_ENCODING_8859_1:
1659 case XML_CHAR_ENCODING_8859_2:
1660 case XML_CHAR_ENCODING_8859_3:
1661 case XML_CHAR_ENCODING_8859_4:
1662 case XML_CHAR_ENCODING_8859_5:
1663 case XML_CHAR_ENCODING_8859_6:
1664 case XML_CHAR_ENCODING_8859_7:
1665 case XML_CHAR_ENCODING_8859_8:
1666 case XML_CHAR_ENCODING_8859_9:
1667 /*
1668 * We used to keep the internal content in the
1669 * document encoding however this turns being unmaintainable
1670 * So xmlGetCharEncodingHandler() will return non-null
1671 * values for this now.
1672 */
1673 if ((ctxt->inputNr == 1) &&
1674 (ctxt->encoding == NULL) &&
1675 (ctxt->input->encoding != NULL)) {
1676 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1677 }
1678 ctxt->charset = enc;
1679 return(0);
1680 case XML_CHAR_ENCODING_2022_JP:
1681 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1683 ctxt->sax->error(ctxt->userData,
1684 "char encoding ISO-2022-JPnot supported\n");
1685 break;
1686 case XML_CHAR_ENCODING_SHIFT_JIS:
1687 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689 ctxt->sax->error(ctxt->userData,
1690 "char encoding Shift_JIS not supported\n");
1691 break;
1692 case XML_CHAR_ENCODING_EUC_JP:
1693 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1695 ctxt->sax->error(ctxt->userData,
1696 "char encoding EUC-JPnot supported\n");
1697 break;
1698 }
1699 }
1700 if (handler == NULL)
1701 return(-1);
1702 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1703 return(xmlSwitchToEncoding(ctxt, handler));
1704}
1705
1706/**
1707 * xmlSwitchToEncoding:
1708 * @ctxt: the parser context
1709 * @handler: the encoding handler
1710 *
1711 * change the input functions when discovering the character encoding
1712 * of a given entity.
1713 *
1714 * Returns 0 in case of success, -1 otherwise
1715 */
1716int
1717xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1718{
1719 int nbchars;
1720
1721 if (handler != NULL) {
1722 if (ctxt->input != NULL) {
1723 if (ctxt->input->buf != NULL) {
1724 if (ctxt->input->buf->encoder != NULL) {
1725 if (ctxt->input->buf->encoder == handler)
1726 return(0);
1727 /*
1728 * Note: this is a bit dangerous, but that's what it
1729 * takes to use nearly compatible signature for different
1730 * encodings.
1731 */
1732 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1733 ctxt->input->buf->encoder = handler;
1734 return(0);
1735 }
1736 ctxt->input->buf->encoder = handler;
1737
1738 /*
1739 * Is there already some content down the pipe to convert ?
1740 */
1741 if ((ctxt->input->buf->buffer != NULL) &&
1742 (ctxt->input->buf->buffer->use > 0)) {
1743 int processed;
1744
1745 /*
1746 * Specific handling of the Byte Order Mark for
1747 * UTF-16
1748 */
1749 if ((handler->name != NULL) &&
1750 (!strcmp(handler->name, "UTF-16LE")) &&
1751 (ctxt->input->cur[0] == 0xFF) &&
1752 (ctxt->input->cur[1] == 0xFE)) {
1753 ctxt->input->cur += 2;
1754 }
1755 if ((handler->name != NULL) &&
1756 (!strcmp(handler->name, "UTF-16BE")) &&
1757 (ctxt->input->cur[0] == 0xFE) &&
1758 (ctxt->input->cur[1] == 0xFF)) {
1759 ctxt->input->cur += 2;
1760 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001761 /*
1762 * Errata on XML-1.0 June 20 2001
1763 * Specific handling of the Byte Order Mark for
1764 * UTF-8
1765 */
1766 if ((handler->name != NULL) &&
1767 (!strcmp(handler->name, "UTF-8")) &&
1768 (ctxt->input->cur[0] == 0xEF) &&
1769 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001770 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001771 ctxt->input->cur += 3;
1772 }
Owen Taylor3473f882001-02-23 17:55:21 +00001773
1774 /*
1775 * Shring the current input buffer.
1776 * Move it as the raw buffer and create a new input buffer
1777 */
1778 processed = ctxt->input->cur - ctxt->input->base;
1779 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1780 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1781 ctxt->input->buf->buffer = xmlBufferCreate();
1782
1783 if (ctxt->html) {
1784 /*
1785 * converst as much as possbile of the buffer
1786 */
1787 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1788 ctxt->input->buf->buffer,
1789 ctxt->input->buf->raw);
1790 } else {
1791 /*
1792 * convert just enough to get
1793 * '<?xml version="1.0" encoding="xxx"?>'
1794 * parsed with the autodetected encoding
1795 * into the parser reading buffer.
1796 */
1797 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1798 ctxt->input->buf->buffer,
1799 ctxt->input->buf->raw);
1800 }
1801 if (nbchars < 0) {
1802 xmlGenericError(xmlGenericErrorContext,
1803 "xmlSwitchToEncoding: encoder error\n");
1804 return(-1);
1805 }
1806 ctxt->input->base =
1807 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001808 ctxt->input->end =
1809 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001810
1811 }
1812 return(0);
1813 } else {
1814 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1815 /*
1816 * When parsing a static memory array one must know the
1817 * size to be able to convert the buffer.
1818 */
1819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1820 ctxt->sax->error(ctxt->userData,
1821 "xmlSwitchEncoding : no input\n");
1822 return(-1);
1823 } else {
1824 int processed;
1825
1826 /*
1827 * Shring the current input buffer.
1828 * Move it as the raw buffer and create a new input buffer
1829 */
1830 processed = ctxt->input->cur - ctxt->input->base;
1831
1832 ctxt->input->buf->raw = xmlBufferCreate();
1833 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1834 ctxt->input->length - processed);
1835 ctxt->input->buf->buffer = xmlBufferCreate();
1836
1837 /*
1838 * convert as much as possible of the raw input
1839 * to the parser reading buffer.
1840 */
1841 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1842 ctxt->input->buf->buffer,
1843 ctxt->input->buf->raw);
1844 if (nbchars < 0) {
1845 xmlGenericError(xmlGenericErrorContext,
1846 "xmlSwitchToEncoding: encoder error\n");
1847 return(-1);
1848 }
1849
1850 /*
1851 * Conversion succeeded, get rid of the old buffer
1852 */
1853 if ((ctxt->input->free != NULL) &&
1854 (ctxt->input->base != NULL))
1855 ctxt->input->free((xmlChar *) ctxt->input->base);
1856 ctxt->input->base =
1857 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001858 ctxt->input->end =
1859 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001860 }
1861 }
1862 } else {
1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864 ctxt->sax->error(ctxt->userData,
1865 "xmlSwitchEncoding : no input\n");
1866 return(-1);
1867 }
1868 /*
1869 * The parsing is now done in UTF8 natively
1870 */
1871 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1872 } else
1873 return(-1);
1874 return(0);
1875
1876}
1877
1878/************************************************************************
1879 * *
1880 * Commodity functions to handle entities processing *
1881 * *
1882 ************************************************************************/
1883
1884/**
1885 * xmlFreeInputStream:
1886 * @input: an xmlParserInputPtr
1887 *
1888 * Free up an input stream.
1889 */
1890void
1891xmlFreeInputStream(xmlParserInputPtr input) {
1892 if (input == NULL) return;
1893
1894 if (input->filename != NULL) xmlFree((char *) input->filename);
1895 if (input->directory != NULL) xmlFree((char *) input->directory);
1896 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1897 if (input->version != NULL) xmlFree((char *) input->version);
1898 if ((input->free != NULL) && (input->base != NULL))
1899 input->free((xmlChar *) input->base);
1900 if (input->buf != NULL)
1901 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001902 xmlFree(input);
1903}
1904
1905/**
1906 * xmlNewInputStream:
1907 * @ctxt: an XML parser context
1908 *
1909 * Create a new input stream structure
1910 * Returns the new input stream or NULL
1911 */
1912xmlParserInputPtr
1913xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1914 xmlParserInputPtr input;
1915
1916 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1917 if (input == NULL) {
1918 if (ctxt != NULL) {
1919 ctxt->errNo = XML_ERR_NO_MEMORY;
1920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1921 ctxt->sax->error(ctxt->userData,
1922 "malloc: couldn't allocate a new input stream\n");
1923 ctxt->errNo = XML_ERR_NO_MEMORY;
1924 }
1925 return(NULL);
1926 }
1927 memset(input, 0, sizeof(xmlParserInput));
1928 input->line = 1;
1929 input->col = 1;
1930 input->standalone = -1;
1931 return(input);
1932}
1933
1934/**
1935 * xmlNewIOInputStream:
1936 * @ctxt: an XML parser context
1937 * @input: an I/O Input
1938 * @enc: the charset encoding if known
1939 *
1940 * Create a new input stream structure encapsulating the @input into
1941 * a stream suitable for the parser.
1942 *
1943 * Returns the new input stream or NULL
1944 */
1945xmlParserInputPtr
1946xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1947 xmlCharEncoding enc) {
1948 xmlParserInputPtr inputStream;
1949
1950 if (xmlParserDebugEntities)
1951 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1952 inputStream = xmlNewInputStream(ctxt);
1953 if (inputStream == NULL) {
1954 return(NULL);
1955 }
1956 inputStream->filename = NULL;
1957 inputStream->buf = input;
1958 inputStream->base = inputStream->buf->buffer->content;
1959 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001960 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001961 if (enc != XML_CHAR_ENCODING_NONE) {
1962 xmlSwitchEncoding(ctxt, enc);
1963 }
1964
1965 return(inputStream);
1966}
1967
1968/**
1969 * xmlNewEntityInputStream:
1970 * @ctxt: an XML parser context
1971 * @entity: an Entity pointer
1972 *
1973 * Create a new input stream based on an xmlEntityPtr
1974 *
1975 * Returns the new input stream or NULL
1976 */
1977xmlParserInputPtr
1978xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1979 xmlParserInputPtr input;
1980
1981 if (entity == NULL) {
1982 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984 ctxt->sax->error(ctxt->userData,
1985 "internal: xmlNewEntityInputStream entity = NULL\n");
1986 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1987 return(NULL);
1988 }
1989 if (xmlParserDebugEntities)
1990 xmlGenericError(xmlGenericErrorContext,
1991 "new input from entity: %s\n", entity->name);
1992 if (entity->content == NULL) {
1993 switch (entity->etype) {
1994 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1995 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "xmlNewEntityInputStream unparsed entity !\n");
1999 break;
2000 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2001 case XML_EXTERNAL_PARAMETER_ENTITY:
2002 return(xmlLoadExternalEntity((char *) entity->URI,
2003 (char *) entity->ExternalID, ctxt));
2004 case XML_INTERNAL_GENERAL_ENTITY:
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "Internal entity %s without content !\n", entity->name);
2008 break;
2009 case XML_INTERNAL_PARAMETER_ENTITY:
2010 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData,
2013 "Internal parameter entity %s without content !\n", entity->name);
2014 break;
2015 case XML_INTERNAL_PREDEFINED_ENTITY:
2016 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2018 ctxt->sax->error(ctxt->userData,
2019 "Predefined entity %s without content !\n", entity->name);
2020 break;
2021 }
2022 return(NULL);
2023 }
2024 input = xmlNewInputStream(ctxt);
2025 if (input == NULL) {
2026 return(NULL);
2027 }
2028 input->filename = (char *) entity->URI;
2029 input->base = entity->content;
2030 input->cur = entity->content;
2031 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002032 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002033 return(input);
2034}
2035
2036/**
2037 * xmlNewStringInputStream:
2038 * @ctxt: an XML parser context
2039 * @buffer: an memory buffer
2040 *
2041 * Create a new input stream based on a memory buffer.
2042 * Returns the new input stream
2043 */
2044xmlParserInputPtr
2045xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2046 xmlParserInputPtr input;
2047
2048 if (buffer == NULL) {
2049 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2051 ctxt->sax->error(ctxt->userData,
2052 "internal: xmlNewStringInputStream string = NULL\n");
2053 return(NULL);
2054 }
2055 if (xmlParserDebugEntities)
2056 xmlGenericError(xmlGenericErrorContext,
2057 "new fixed input: %.30s\n", buffer);
2058 input = xmlNewInputStream(ctxt);
2059 if (input == NULL) {
2060 return(NULL);
2061 }
2062 input->base = buffer;
2063 input->cur = buffer;
2064 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002065 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002066 return(input);
2067}
2068
2069/**
2070 * xmlNewInputFromFile:
2071 * @ctxt: an XML parser context
2072 * @filename: the filename to use as entity
2073 *
2074 * Create a new input stream based on a file.
2075 *
2076 * Returns the new input stream or NULL in case of error
2077 */
2078xmlParserInputPtr
2079xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2080 xmlParserInputBufferPtr buf;
2081 xmlParserInputPtr inputStream;
2082 char *directory = NULL;
2083 xmlChar *URI = NULL;
2084
2085 if (xmlParserDebugEntities)
2086 xmlGenericError(xmlGenericErrorContext,
2087 "new input from file: %s\n", filename);
2088 if (ctxt == NULL) return(NULL);
2089 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2090 if (buf == NULL)
2091 return(NULL);
2092
2093 URI = xmlStrdup((xmlChar *) filename);
2094 directory = xmlParserGetDirectory((const char *) URI);
2095
2096 inputStream = xmlNewInputStream(ctxt);
2097 if (inputStream == NULL) {
2098 if (directory != NULL) xmlFree((char *) directory);
2099 if (URI != NULL) xmlFree((char *) URI);
2100 return(NULL);
2101 }
2102
2103 inputStream->filename = (const char *) URI;
2104 inputStream->directory = directory;
2105 inputStream->buf = buf;
2106
2107 inputStream->base = inputStream->buf->buffer->content;
2108 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002109 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002110 if ((ctxt->directory == NULL) && (directory != NULL))
2111 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2112 return(inputStream);
2113}
2114
2115/************************************************************************
2116 * *
2117 * Commodity functions to handle parser contexts *
2118 * *
2119 ************************************************************************/
2120
2121/**
2122 * xmlInitParserCtxt:
2123 * @ctxt: an XML parser context
2124 *
2125 * Initialize a parser context
2126 */
2127
2128void
2129xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2130{
2131 xmlSAXHandler *sax;
2132
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002133 if(ctxt==NULL) {
2134 xmlGenericError(xmlGenericErrorContext,
2135 "xmlInitParserCtxt: NULL context given\n");
2136 return;
2137 }
2138
Owen Taylor3473f882001-02-23 17:55:21 +00002139 xmlDefaultSAXHandlerInit();
2140
2141 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2142 if (sax == NULL) {
2143 xmlGenericError(xmlGenericErrorContext,
2144 "xmlInitParserCtxt: out of memory\n");
2145 }
2146 else
2147 memset(sax, 0, sizeof(xmlSAXHandler));
2148
2149 /* Allocate the Input stack */
2150 ctxt->inputTab = (xmlParserInputPtr *)
2151 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2152 if (ctxt->inputTab == NULL) {
2153 xmlGenericError(xmlGenericErrorContext,
2154 "xmlInitParserCtxt: out of memory\n");
2155 ctxt->inputNr = 0;
2156 ctxt->inputMax = 0;
2157 ctxt->input = NULL;
2158 return;
2159 }
2160 ctxt->inputNr = 0;
2161 ctxt->inputMax = 5;
2162 ctxt->input = NULL;
2163
2164 ctxt->version = NULL;
2165 ctxt->encoding = NULL;
2166 ctxt->standalone = -1;
2167 ctxt->hasExternalSubset = 0;
2168 ctxt->hasPErefs = 0;
2169 ctxt->html = 0;
2170 ctxt->external = 0;
2171 ctxt->instate = XML_PARSER_START;
2172 ctxt->token = 0;
2173 ctxt->directory = NULL;
2174
2175 /* Allocate the Node stack */
2176 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2177 if (ctxt->nodeTab == NULL) {
2178 xmlGenericError(xmlGenericErrorContext,
2179 "xmlInitParserCtxt: out of memory\n");
2180 ctxt->nodeNr = 0;
2181 ctxt->nodeMax = 0;
2182 ctxt->node = NULL;
2183 ctxt->inputNr = 0;
2184 ctxt->inputMax = 0;
2185 ctxt->input = NULL;
2186 return;
2187 }
2188 ctxt->nodeNr = 0;
2189 ctxt->nodeMax = 10;
2190 ctxt->node = NULL;
2191
2192 /* Allocate the Name stack */
2193 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2194 if (ctxt->nameTab == NULL) {
2195 xmlGenericError(xmlGenericErrorContext,
2196 "xmlInitParserCtxt: out of memory\n");
2197 ctxt->nodeNr = 0;
2198 ctxt->nodeMax = 0;
2199 ctxt->node = NULL;
2200 ctxt->inputNr = 0;
2201 ctxt->inputMax = 0;
2202 ctxt->input = NULL;
2203 ctxt->nameNr = 0;
2204 ctxt->nameMax = 0;
2205 ctxt->name = NULL;
2206 return;
2207 }
2208 ctxt->nameNr = 0;
2209 ctxt->nameMax = 10;
2210 ctxt->name = NULL;
2211
2212 /* Allocate the space stack */
2213 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2214 if (ctxt->spaceTab == NULL) {
2215 xmlGenericError(xmlGenericErrorContext,
2216 "xmlInitParserCtxt: out of memory\n");
2217 ctxt->nodeNr = 0;
2218 ctxt->nodeMax = 0;
2219 ctxt->node = NULL;
2220 ctxt->inputNr = 0;
2221 ctxt->inputMax = 0;
2222 ctxt->input = NULL;
2223 ctxt->nameNr = 0;
2224 ctxt->nameMax = 0;
2225 ctxt->name = NULL;
2226 ctxt->spaceNr = 0;
2227 ctxt->spaceMax = 0;
2228 ctxt->space = NULL;
2229 return;
2230 }
2231 ctxt->spaceNr = 1;
2232 ctxt->spaceMax = 10;
2233 ctxt->spaceTab[0] = -1;
2234 ctxt->space = &ctxt->spaceTab[0];
2235
Daniel Veillard14be0a12001-03-03 18:50:55 +00002236 ctxt->sax = sax;
2237 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2238
Owen Taylor3473f882001-02-23 17:55:21 +00002239 ctxt->userData = ctxt;
2240 ctxt->myDoc = NULL;
2241 ctxt->wellFormed = 1;
2242 ctxt->valid = 1;
2243 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2244 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2245 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002246 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002247 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002248 if (ctxt->keepBlanks == 0)
2249 sax->ignorableWhitespace = ignorableWhitespace;
2250
Owen Taylor3473f882001-02-23 17:55:21 +00002251 ctxt->vctxt.userData = ctxt;
2252 if (ctxt->validate) {
2253 ctxt->vctxt.error = xmlParserValidityError;
2254 if (xmlGetWarningsDefaultValue == 0)
2255 ctxt->vctxt.warning = NULL;
2256 else
2257 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002258 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 } else {
2260 ctxt->vctxt.error = NULL;
2261 ctxt->vctxt.warning = NULL;
2262 }
2263 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2264 ctxt->record_info = 0;
2265 ctxt->nbChars = 0;
2266 ctxt->checkIndex = 0;
2267 ctxt->inSubset = 0;
2268 ctxt->errNo = XML_ERR_OK;
2269 ctxt->depth = 0;
2270 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002271 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002272 xmlInitNodeInfoSeq(&ctxt->node_seq);
2273}
2274
2275/**
2276 * xmlFreeParserCtxt:
2277 * @ctxt: an XML parser context
2278 *
2279 * Free all the memory used by a parser context. However the parsed
2280 * document in ctxt->myDoc is not freed.
2281 */
2282
2283void
2284xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2285{
2286 xmlParserInputPtr input;
2287 xmlChar *oldname;
2288
2289 if (ctxt == NULL) return;
2290
2291 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2292 xmlFreeInputStream(input);
2293 }
2294 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2295 xmlFree(oldname);
2296 }
2297 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2298 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2299 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2300 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2301 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2302 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2303 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2304 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2305 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002306 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2307 xmlFree(ctxt->sax);
2308 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002309 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002310#ifdef LIBXML_CATALOG_ENABLED
2311 if (ctxt->catalogs != NULL)
2312 xmlCatalogFreeLocal(ctxt->catalogs);
2313#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002314 xmlFree(ctxt);
2315}
2316
2317/**
2318 * xmlNewParserCtxt:
2319 *
2320 * Allocate and initialize a new parser context.
2321 *
2322 * Returns the xmlParserCtxtPtr or NULL
2323 */
2324
2325xmlParserCtxtPtr
2326xmlNewParserCtxt()
2327{
2328 xmlParserCtxtPtr ctxt;
2329
2330 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2331 if (ctxt == NULL) {
2332 xmlGenericError(xmlGenericErrorContext,
2333 "xmlNewParserCtxt : cannot allocate context\n");
2334 perror("malloc");
2335 return(NULL);
2336 }
2337 memset(ctxt, 0, sizeof(xmlParserCtxt));
2338 xmlInitParserCtxt(ctxt);
2339 return(ctxt);
2340}
2341
2342/************************************************************************
2343 * *
2344 * Handling of node informations *
2345 * *
2346 ************************************************************************/
2347
2348/**
2349 * xmlClearParserCtxt:
2350 * @ctxt: an XML parser context
2351 *
2352 * Clear (release owned resources) and reinitialize a parser context
2353 */
2354
2355void
2356xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2357{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002358 if (ctxt==NULL)
2359 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002360 xmlClearNodeInfoSeq(&ctxt->node_seq);
2361 xmlInitParserCtxt(ctxt);
2362}
2363
2364/**
2365 * xmlParserFindNodeInfo:
2366 * @ctxt: an XML parser context
2367 * @node: an XML node within the tree
2368 *
2369 * Find the parser node info struct for a given node
2370 *
2371 * Returns an xmlParserNodeInfo block pointer or NULL
2372 */
2373const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2374 const xmlNode* node)
2375{
2376 unsigned long pos;
2377
2378 /* Find position where node should be at */
2379 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002380 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002381 return &ctx->node_seq.buffer[pos];
2382 else
2383 return NULL;
2384}
2385
2386
2387/**
2388 * xmlInitNodeInfoSeq:
2389 * @seq: a node info sequence pointer
2390 *
2391 * -- Initialize (set to initial state) node info sequence
2392 */
2393void
2394xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2395{
2396 seq->length = 0;
2397 seq->maximum = 0;
2398 seq->buffer = NULL;
2399}
2400
2401/**
2402 * xmlClearNodeInfoSeq:
2403 * @seq: a node info sequence pointer
2404 *
2405 * -- Clear (release memory and reinitialize) node
2406 * info sequence
2407 */
2408void
2409xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2410{
2411 if ( seq->buffer != NULL )
2412 xmlFree(seq->buffer);
2413 xmlInitNodeInfoSeq(seq);
2414}
2415
2416
2417/**
2418 * xmlParserFindNodeInfoIndex:
2419 * @seq: a node info sequence pointer
2420 * @node: an XML node pointer
2421 *
2422 *
2423 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2424 * the given node is or should be at in a sorted sequence
2425 *
2426 * Returns a long indicating the position of the record
2427 */
2428unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2429 const xmlNode* node)
2430{
2431 unsigned long upper, lower, middle;
2432 int found = 0;
2433
2434 /* Do a binary search for the key */
2435 lower = 1;
2436 upper = seq->length;
2437 middle = 0;
2438 while ( lower <= upper && !found) {
2439 middle = lower + (upper - lower) / 2;
2440 if ( node == seq->buffer[middle - 1].node )
2441 found = 1;
2442 else if ( node < seq->buffer[middle - 1].node )
2443 upper = middle - 1;
2444 else
2445 lower = middle + 1;
2446 }
2447
2448 /* Return position */
2449 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2450 return middle;
2451 else
2452 return middle - 1;
2453}
2454
2455
2456/**
2457 * xmlParserAddNodeInfo:
2458 * @ctxt: an XML parser context
2459 * @info: a node info sequence pointer
2460 *
2461 * Insert node info record into the sorted sequence
2462 */
2463void
2464xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2465 const xmlParserNodeInfo* info)
2466{
2467 unsigned long pos;
2468 static unsigned int block_size = 5;
2469
2470 /* Find pos and check to see if node is already in the sequence */
2471 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2472 if ( pos < ctxt->node_seq.length
2473 && ctxt->node_seq.buffer[pos].node == info->node ) {
2474 ctxt->node_seq.buffer[pos] = *info;
2475 }
2476
2477 /* Otherwise, we need to add new node to buffer */
2478 else {
2479 /* Expand buffer by 5 if needed */
2480 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2481 xmlParserNodeInfo* tmp_buffer;
2482 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2483 *(ctxt->node_seq.maximum + block_size));
2484
2485 if ( ctxt->node_seq.buffer == NULL )
2486 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2487 else
2488 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2489
2490 if ( tmp_buffer == NULL ) {
2491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2492 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2493 ctxt->errNo = XML_ERR_NO_MEMORY;
2494 return;
2495 }
2496 ctxt->node_seq.buffer = tmp_buffer;
2497 ctxt->node_seq.maximum += block_size;
2498 }
2499
2500 /* If position is not at end, move elements out of the way */
2501 if ( pos != ctxt->node_seq.length ) {
2502 unsigned long i;
2503
2504 for ( i = ctxt->node_seq.length; i > pos; i-- )
2505 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2506 }
2507
2508 /* Copy element and increase length */
2509 ctxt->node_seq.buffer[pos] = *info;
2510 ctxt->node_seq.length++;
2511 }
2512}
2513
2514/************************************************************************
2515 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002516 * Defaults settings *
2517 * *
2518 ************************************************************************/
2519/**
2520 * xmlPedanticParserDefault:
2521 * @val: int 0 or 1
2522 *
2523 * Set and return the previous value for enabling pedantic warnings.
2524 *
2525 * Returns the last value for 0 for no substitution, 1 for substitution.
2526 */
2527
2528int
2529xmlPedanticParserDefault(int val) {
2530 int old = xmlPedanticParserDefaultValue;
2531
2532 xmlPedanticParserDefaultValue = val;
2533 return(old);
2534}
2535
2536/**
2537 * xmlLineNumbersDefault:
2538 * @val: int 0 or 1
2539 *
2540 * Set and return the previous value for enabling line numbers in elements
2541 * contents. This may break on old application and is turned off by default.
2542 *
2543 * Returns the last value for 0 for no substitution, 1 for substitution.
2544 */
2545
2546int
2547xmlLineNumbersDefault(int val) {
2548 int old = xmlLineNumbersDefaultValue;
2549
2550 xmlLineNumbersDefaultValue = val;
2551 return(old);
2552}
2553
2554/**
2555 * xmlSubstituteEntitiesDefault:
2556 * @val: int 0 or 1
2557 *
2558 * Set and return the previous value for default entity support.
2559 * Initially the parser always keep entity references instead of substituting
2560 * entity values in the output. This function has to be used to change the
2561 * default parser behaviour
2562 * SAX::subtituteEntities() has to be used for changing that on a file by
2563 * file basis.
2564 *
2565 * Returns the last value for 0 for no substitution, 1 for substitution.
2566 */
2567
2568int
2569xmlSubstituteEntitiesDefault(int val) {
2570 int old = xmlSubstituteEntitiesDefaultValue;
2571
2572 xmlSubstituteEntitiesDefaultValue = val;
2573 return(old);
2574}
2575
2576/**
2577 * xmlKeepBlanksDefault:
2578 * @val: int 0 or 1
2579 *
2580 * Set and return the previous value for default blanks text nodes support.
2581 * The 1.x version of the parser used an heuristic to try to detect
2582 * ignorable white spaces. As a result the SAX callback was generating
2583 * ignorableWhitespace() callbacks instead of characters() one, and when
2584 * using the DOM output text nodes containing those blanks were not generated.
2585 * The 2.x and later version will switch to the XML standard way and
2586 * ignorableWhitespace() are only generated when running the parser in
2587 * validating mode and when the current element doesn't allow CDATA or
2588 * mixed content.
2589 * This function is provided as a way to force the standard behaviour
2590 * on 1.X libs and to switch back to the old mode for compatibility when
2591 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2592 * by using xmlIsBlankNode() commodity function to detect the "empty"
2593 * nodes generated.
2594 * This value also affect autogeneration of indentation when saving code
2595 * if blanks sections are kept, indentation is not generated.
2596 *
2597 * Returns the last value for 0 for no substitution, 1 for substitution.
2598 */
2599
2600int
2601xmlKeepBlanksDefault(int val) {
2602 int old = xmlKeepBlanksDefaultValue;
2603
2604 xmlKeepBlanksDefaultValue = val;
2605 xmlIndentTreeOutput = !val;
2606 return(old);
2607}
2608
2609/************************************************************************
2610 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002611 * Deprecated functions kept for compatibility *
2612 * *
2613 ************************************************************************/
2614
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002615/**
2616 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002617 * @lang: pointer to the string value
2618 *
2619 * Checks that the value conforms to the LanguageID production:
2620 *
2621 * NOTE: this is somewhat deprecated, those productions were removed from
2622 * the XML Second edition.
2623 *
2624 * [33] LanguageID ::= Langcode ('-' Subcode)*
2625 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2626 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2627 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2628 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2629 * [38] Subcode ::= ([a-z] | [A-Z])+
2630 *
2631 * Returns 1 if correct 0 otherwise
2632 **/
2633int
2634xmlCheckLanguageID(const xmlChar *lang) {
2635 const xmlChar *cur = lang;
2636
2637 if (cur == NULL)
2638 return(0);
2639 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2640 ((cur[0] == 'I') && (cur[1] == '-'))) {
2641 /*
2642 * IANA code
2643 */
2644 cur += 2;
2645 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2646 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2647 cur++;
2648 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2649 ((cur[0] == 'X') && (cur[1] == '-'))) {
2650 /*
2651 * User code
2652 */
2653 cur += 2;
2654 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2655 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2656 cur++;
2657 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2658 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2659 /*
2660 * ISO639
2661 */
2662 cur++;
2663 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2664 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2665 cur++;
2666 else
2667 return(0);
2668 } else
2669 return(0);
2670 while (cur[0] != 0) { /* non input consuming */
2671 if (cur[0] != '-')
2672 return(0);
2673 cur++;
2674 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2675 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2676 cur++;
2677 else
2678 return(0);
2679 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2680 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2681 cur++;
2682 }
2683 return(1);
2684}
2685
2686/**
2687 * xmlDecodeEntities:
2688 * @ctxt: the parser context
2689 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2690 * @len: the len to decode (in bytes !), -1 for no size limit
2691 * @end: an end marker xmlChar, 0 if none
2692 * @end2: an end marker xmlChar, 0 if none
2693 * @end3: an end marker xmlChar, 0 if none
2694 *
2695 * This function is deprecated, we now always process entities content
2696 * through xmlStringDecodeEntities
2697 *
2698 * TODO: remove it in next major release.
2699 *
2700 * [67] Reference ::= EntityRef | CharRef
2701 *
2702 * [69] PEReference ::= '%' Name ';'
2703 *
2704 * Returns A newly allocated string with the substitution done. The caller
2705 * must deallocate it !
2706 */
2707xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002708xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2709 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002710#if 0
2711 xmlChar *buffer = NULL;
2712 unsigned int buffer_size = 0;
2713 unsigned int nbchars = 0;
2714
2715 xmlChar *current = NULL;
2716 xmlEntityPtr ent;
2717 unsigned int max = (unsigned int) len;
2718 int c,l;
2719#endif
2720
2721 static int deprecated = 0;
2722 if (!deprecated) {
2723 xmlGenericError(xmlGenericErrorContext,
2724 "xmlDecodeEntities() deprecated function reached\n");
2725 deprecated = 1;
2726 }
2727
2728#if 0
2729 if (ctxt->depth > 40) {
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData,
2732 "Detected entity reference loop\n");
2733 ctxt->wellFormed = 0;
2734 ctxt->disableSAX = 1;
2735 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2736 return(NULL);
2737 }
2738
2739 /*
2740 * allocate a translation buffer.
2741 */
2742 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2743 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2744 if (buffer == NULL) {
2745 perror("xmlDecodeEntities: malloc failed");
2746 return(NULL);
2747 }
2748
2749 /*
2750 * Ok loop until we reach one of the ending char or a size limit.
2751 */
2752 GROW;
2753 c = CUR_CHAR(l);
2754 while ((nbchars < max) && (c != end) && /* NOTUSED */
2755 (c != end2) && (c != end3)) {
2756 GROW;
2757 if (c == 0) break;
2758 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2759 int val = xmlParseCharRef(ctxt);
2760 COPY_BUF(0,buffer,nbchars,val);
2761 NEXTL(l);
2762 } else if ((c == '&') && (ctxt->token != '&') &&
2763 (what & XML_SUBSTITUTE_REF)) {
2764 if (xmlParserDebugEntities)
2765 xmlGenericError(xmlGenericErrorContext,
2766 "decoding Entity Reference\n");
2767 ent = xmlParseEntityRef(ctxt);
2768 if ((ent != NULL) &&
2769 (ctxt->replaceEntities != 0)) {
2770 current = ent->content;
2771 while (*current != 0) { /* non input consuming loop */
2772 buffer[nbchars++] = *current++;
2773 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2774 growBuffer(buffer);
2775 }
2776 }
2777 } else if (ent != NULL) {
2778 const xmlChar *cur = ent->name;
2779
2780 buffer[nbchars++] = '&';
2781 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2782 growBuffer(buffer);
2783 }
2784 while (*cur != 0) { /* non input consuming loop */
2785 buffer[nbchars++] = *cur++;
2786 }
2787 buffer[nbchars++] = ';';
2788 }
2789 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2790 /*
2791 * a PEReference induce to switch the entity flow,
2792 * we break here to flush the current set of chars
2793 * parsed if any. We will be called back later.
2794 */
2795 if (xmlParserDebugEntities)
2796 xmlGenericError(xmlGenericErrorContext,
2797 "decoding PE Reference\n");
2798 if (nbchars != 0) break;
2799
2800 xmlParsePEReference(ctxt);
2801
2802 /*
2803 * Pop-up of finished entities.
2804 */
2805 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2806 xmlPopInput(ctxt);
2807
2808 break;
2809 } else {
2810 COPY_BUF(l,buffer,nbchars,c);
2811 NEXTL(l);
2812 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2813 growBuffer(buffer);
2814 }
2815 }
2816 c = CUR_CHAR(l);
2817 }
2818 buffer[nbchars++] = 0;
2819 return(buffer);
2820#endif
2821 return(NULL);
2822}
2823
2824/**
2825 * xmlNamespaceParseNCName:
2826 * @ctxt: an XML parser context
2827 *
2828 * parse an XML namespace name.
2829 *
2830 * TODO: this seems not in use anymore, the namespace handling is done on
2831 * top of the SAX interfaces, i.e. not on raw input.
2832 *
2833 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2834 *
2835 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2836 * CombiningChar | Extender
2837 *
2838 * Returns the namespace name or NULL
2839 */
2840
2841xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002842xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002843#if 0
2844 xmlChar buf[XML_MAX_NAMELEN + 5];
2845 int len = 0, l;
2846 int cur = CUR_CHAR(l);
2847#endif
2848
2849 static int deprecated = 0;
2850 if (!deprecated) {
2851 xmlGenericError(xmlGenericErrorContext,
2852 "xmlNamespaceParseNCName() deprecated function reached\n");
2853 deprecated = 1;
2854 }
2855
2856#if 0
2857 /* load first the value of the char !!! */
2858 GROW;
2859 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2860
2861xmlGenericError(xmlGenericErrorContext,
2862 "xmlNamespaceParseNCName: reached loop 3\n");
2863 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2864 (cur == '.') || (cur == '-') ||
2865 (cur == '_') ||
2866 (IS_COMBINING(cur)) ||
2867 (IS_EXTENDER(cur))) {
2868 COPY_BUF(l,buf,len,cur);
2869 NEXTL(l);
2870 cur = CUR_CHAR(l);
2871 if (len >= XML_MAX_NAMELEN) {
2872 xmlGenericError(xmlGenericErrorContext,
2873 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2874 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2875 (cur == '.') || (cur == '-') ||
2876 (cur == '_') ||
2877 (IS_COMBINING(cur)) ||
2878 (IS_EXTENDER(cur))) {
2879 NEXTL(l);
2880 cur = CUR_CHAR(l);
2881 }
2882 break;
2883 }
2884 }
2885 return(xmlStrndup(buf, len));
2886#endif
2887 return(NULL);
2888}
2889
2890/**
2891 * xmlNamespaceParseQName:
2892 * @ctxt: an XML parser context
2893 * @prefix: a xmlChar **
2894 *
2895 * TODO: this seems not in use anymore, the namespace handling is done on
2896 * top of the SAX interfaces, i.e. not on raw input.
2897 *
2898 * parse an XML qualified name
2899 *
2900 * [NS 5] QName ::= (Prefix ':')? LocalPart
2901 *
2902 * [NS 6] Prefix ::= NCName
2903 *
2904 * [NS 7] LocalPart ::= NCName
2905 *
2906 * Returns the local part, and prefix is updated
2907 * to get the Prefix if any.
2908 */
2909
2910xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002911xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002912
2913 static int deprecated = 0;
2914 if (!deprecated) {
2915 xmlGenericError(xmlGenericErrorContext,
2916 "xmlNamespaceParseQName() deprecated function reached\n");
2917 deprecated = 1;
2918 }
2919
2920#if 0
2921 xmlChar *ret = NULL;
2922
2923 *prefix = NULL;
2924 ret = xmlNamespaceParseNCName(ctxt);
2925 if (RAW == ':') {
2926 *prefix = ret;
2927 NEXT;
2928 ret = xmlNamespaceParseNCName(ctxt);
2929 }
2930
2931 return(ret);
2932#endif
2933 return(NULL);
2934}
2935
2936/**
2937 * xmlNamespaceParseNSDef:
2938 * @ctxt: an XML parser context
2939 *
2940 * parse a namespace prefix declaration
2941 *
2942 * TODO: this seems not in use anymore, the namespace handling is done on
2943 * top of the SAX interfaces, i.e. not on raw input.
2944 *
2945 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2946 *
2947 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2948 *
2949 * Returns the namespace name
2950 */
2951
2952xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002953xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002954 static int deprecated = 0;
2955 if (!deprecated) {
2956 xmlGenericError(xmlGenericErrorContext,
2957 "xmlNamespaceParseNSDef() deprecated function reached\n");
2958 deprecated = 1;
2959 }
2960 return(NULL);
2961#if 0
2962 xmlChar *name = NULL;
2963
2964 if ((RAW == 'x') && (NXT(1) == 'm') &&
2965 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2966 (NXT(4) == 's')) {
2967 SKIP(5);
2968 if (RAW == ':') {
2969 NEXT;
2970 name = xmlNamespaceParseNCName(ctxt);
2971 }
2972 }
2973 return(name);
2974#endif
2975}
2976
2977/**
2978 * xmlParseQuotedString:
2979 * @ctxt: an XML parser context
2980 *
2981 * Parse and return a string between quotes or doublequotes
2982 *
2983 * TODO: Deprecated, to be removed at next drop of binary compatibility
2984 *
2985 * Returns the string parser or NULL.
2986 */
2987xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002988xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002989 static int deprecated = 0;
2990 if (!deprecated) {
2991 xmlGenericError(xmlGenericErrorContext,
2992 "xmlParseQuotedString() deprecated function reached\n");
2993 deprecated = 1;
2994 }
2995 return(NULL);
2996
2997#if 0
2998 xmlChar *buf = NULL;
2999 int len = 0,l;
3000 int size = XML_PARSER_BUFFER_SIZE;
3001 int c;
3002
3003 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3004 if (buf == NULL) {
3005 xmlGenericError(xmlGenericErrorContext,
3006 "malloc of %d byte failed\n", size);
3007 return(NULL);
3008 }
3009xmlGenericError(xmlGenericErrorContext,
3010 "xmlParseQuotedString: reached loop 4\n");
3011 if (RAW == '"') {
3012 NEXT;
3013 c = CUR_CHAR(l);
3014 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3015 if (len + 5 >= size) {
3016 size *= 2;
3017 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3018 if (buf == NULL) {
3019 xmlGenericError(xmlGenericErrorContext,
3020 "realloc of %d byte failed\n", size);
3021 return(NULL);
3022 }
3023 }
3024 COPY_BUF(l,buf,len,c);
3025 NEXTL(l);
3026 c = CUR_CHAR(l);
3027 }
3028 if (c != '"') {
3029 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3031 ctxt->sax->error(ctxt->userData,
3032 "String not closed \"%.50s\"\n", buf);
3033 ctxt->wellFormed = 0;
3034 ctxt->disableSAX = 1;
3035 } else {
3036 NEXT;
3037 }
3038 } else if (RAW == '\''){
3039 NEXT;
3040 c = CUR;
3041 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3042 if (len + 1 >= size) {
3043 size *= 2;
3044 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3045 if (buf == NULL) {
3046 xmlGenericError(xmlGenericErrorContext,
3047 "realloc of %d byte failed\n", size);
3048 return(NULL);
3049 }
3050 }
3051 buf[len++] = c;
3052 NEXT;
3053 c = CUR;
3054 }
3055 if (RAW != '\'') {
3056 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3058 ctxt->sax->error(ctxt->userData,
3059 "String not closed \"%.50s\"\n", buf);
3060 ctxt->wellFormed = 0;
3061 ctxt->disableSAX = 1;
3062 } else {
3063 NEXT;
3064 }
3065 }
3066 return(buf);
3067#endif
3068}
3069
3070/**
3071 * xmlParseNamespace:
3072 * @ctxt: an XML parser context
3073 *
3074 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3075 *
3076 * This is what the older xml-name Working Draft specified, a bunch of
3077 * other stuff may still rely on it, so support is still here as
3078 * if it was declared on the root of the Tree:-(
3079 *
3080 * TODO: remove from library
3081 *
3082 * To be removed at next drop of binary compatibility
3083 */
3084
3085void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003086xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003087 static int deprecated = 0;
3088 if (!deprecated) {
3089 xmlGenericError(xmlGenericErrorContext,
3090 "xmlParseNamespace() deprecated function reached\n");
3091 deprecated = 1;
3092 }
3093
3094#if 0
3095 xmlChar *href = NULL;
3096 xmlChar *prefix = NULL;
3097 int garbage = 0;
3098
3099 /*
3100 * We just skipped "namespace" or "xml:namespace"
3101 */
3102 SKIP_BLANKS;
3103
3104xmlGenericError(xmlGenericErrorContext,
3105 "xmlParseNamespace: reached loop 5\n");
3106 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3107 /*
3108 * We can have "ns" or "prefix" attributes
3109 * Old encoding as 'href' or 'AS' attributes is still supported
3110 */
3111 if ((RAW == 'n') && (NXT(1) == 's')) {
3112 garbage = 0;
3113 SKIP(2);
3114 SKIP_BLANKS;
3115
3116 if (RAW != '=') continue;
3117 NEXT;
3118 SKIP_BLANKS;
3119
3120 href = xmlParseQuotedString(ctxt);
3121 SKIP_BLANKS;
3122 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3123 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3124 garbage = 0;
3125 SKIP(4);
3126 SKIP_BLANKS;
3127
3128 if (RAW != '=') continue;
3129 NEXT;
3130 SKIP_BLANKS;
3131
3132 href = xmlParseQuotedString(ctxt);
3133 SKIP_BLANKS;
3134 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3135 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3136 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3137 garbage = 0;
3138 SKIP(6);
3139 SKIP_BLANKS;
3140
3141 if (RAW != '=') continue;
3142 NEXT;
3143 SKIP_BLANKS;
3144
3145 prefix = xmlParseQuotedString(ctxt);
3146 SKIP_BLANKS;
3147 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3148 garbage = 0;
3149 SKIP(2);
3150 SKIP_BLANKS;
3151
3152 if (RAW != '=') continue;
3153 NEXT;
3154 SKIP_BLANKS;
3155
3156 prefix = xmlParseQuotedString(ctxt);
3157 SKIP_BLANKS;
3158 } else if ((RAW == '?') && (NXT(1) == '>')) {
3159 garbage = 0;
3160 NEXT;
3161 } else {
3162 /*
3163 * Found garbage when parsing the namespace
3164 */
3165 if (!garbage) {
3166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3167 ctxt->sax->error(ctxt->userData,
3168 "xmlParseNamespace found garbage\n");
3169 }
3170 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3171 ctxt->wellFormed = 0;
3172 ctxt->disableSAX = 1;
3173 NEXT;
3174 }
3175 }
3176
3177 MOVETO_ENDTAG(CUR_PTR);
3178 NEXT;
3179
3180 /*
3181 * Register the DTD.
3182 if (href != NULL)
3183 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3184 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3185 */
3186
3187 if (prefix != NULL) xmlFree(prefix);
3188 if (href != NULL) xmlFree(href);
3189#endif
3190}
3191
3192/**
3193 * xmlScanName:
3194 * @ctxt: an XML parser context
3195 *
3196 * Trickery: parse an XML name but without consuming the input flow
3197 * Needed for rollback cases. Used only when parsing entities references.
3198 *
3199 * TODO: seems deprecated now, only used in the default part of
3200 * xmlParserHandleReference
3201 *
3202 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3203 * CombiningChar | Extender
3204 *
3205 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3206 *
3207 * [6] Names ::= Name (S Name)*
3208 *
3209 * Returns the Name parsed or NULL
3210 */
3211
3212xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003213xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003214 static int deprecated = 0;
3215 if (!deprecated) {
3216 xmlGenericError(xmlGenericErrorContext,
3217 "xmlScanName() deprecated function reached\n");
3218 deprecated = 1;
3219 }
3220 return(NULL);
3221
3222#if 0
3223 xmlChar buf[XML_MAX_NAMELEN];
3224 int len = 0;
3225
3226 GROW;
3227 if (!IS_LETTER(RAW) && (RAW != '_') &&
3228 (RAW != ':')) {
3229 return(NULL);
3230 }
3231
3232
3233 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3234 (NXT(len) == '.') || (NXT(len) == '-') ||
3235 (NXT(len) == '_') || (NXT(len) == ':') ||
3236 (IS_COMBINING(NXT(len))) ||
3237 (IS_EXTENDER(NXT(len)))) {
3238 GROW;
3239 buf[len] = NXT(len);
3240 len++;
3241 if (len >= XML_MAX_NAMELEN) {
3242 xmlGenericError(xmlGenericErrorContext,
3243 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3244 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3245 (IS_DIGIT(NXT(len))) ||
3246 (NXT(len) == '.') || (NXT(len) == '-') ||
3247 (NXT(len) == '_') || (NXT(len) == ':') ||
3248 (IS_COMBINING(NXT(len))) ||
3249 (IS_EXTENDER(NXT(len))))
3250 len++;
3251 break;
3252 }
3253 }
3254 return(xmlStrndup(buf, len));
3255#endif
3256}
3257
3258/**
3259 * xmlParserHandleReference:
3260 * @ctxt: the parser context
3261 *
3262 * TODO: Remove, now deprecated ... the test is done directly in the
3263 * content parsing
3264 * routines.
3265 *
3266 * [67] Reference ::= EntityRef | CharRef
3267 *
3268 * [68] EntityRef ::= '&' Name ';'
3269 *
3270 * [ WFC: Entity Declared ]
3271 * the Name given in the entity reference must match that in an entity
3272 * declaration, except that well-formed documents need not declare any
3273 * of the following entities: amp, lt, gt, apos, quot.
3274 *
3275 * [ WFC: Parsed Entity ]
3276 * An entity reference must not contain the name of an unparsed entity
3277 *
3278 * [66] CharRef ::= '&#' [0-9]+ ';' |
3279 * '&#x' [0-9a-fA-F]+ ';'
3280 *
3281 * A PEReference may have been detectect in the current input stream
3282 * the handling is done accordingly to
3283 * http://www.w3.org/TR/REC-xml#entproc
3284 */
3285void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003286xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003287 static int deprecated = 0;
3288 if (!deprecated) {
3289 xmlGenericError(xmlGenericErrorContext,
3290 "xmlParserHandleReference() deprecated function reached\n");
3291 deprecated = 1;
3292 }
3293
3294#if 0
3295 xmlParserInputPtr input;
3296 xmlChar *name;
3297 xmlEntityPtr ent = NULL;
3298
3299 if (ctxt->token != 0) {
3300 return;
3301 }
3302 if (RAW != '&') return;
3303 GROW;
3304 if ((RAW == '&') && (NXT(1) == '#')) {
3305 switch(ctxt->instate) {
3306 case XML_PARSER_ENTITY_DECL:
3307 case XML_PARSER_PI:
3308 case XML_PARSER_CDATA_SECTION:
3309 case XML_PARSER_COMMENT:
3310 case XML_PARSER_SYSTEM_LITERAL:
3311 /* we just ignore it there */
3312 return;
3313 case XML_PARSER_START_TAG:
3314 return;
3315 case XML_PARSER_END_TAG:
3316 return;
3317 case XML_PARSER_EOF:
3318 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3321 ctxt->wellFormed = 0;
3322 ctxt->disableSAX = 1;
3323 return;
3324 case XML_PARSER_PROLOG:
3325 case XML_PARSER_START:
3326 case XML_PARSER_MISC:
3327 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3329 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3330 ctxt->wellFormed = 0;
3331 ctxt->disableSAX = 1;
3332 return;
3333 case XML_PARSER_EPILOG:
3334 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3336 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3337 ctxt->wellFormed = 0;
3338 ctxt->disableSAX = 1;
3339 return;
3340 case XML_PARSER_DTD:
3341 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "CharRef are forbiden in DTDs!\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 return;
3348 case XML_PARSER_ENTITY_VALUE:
3349 /*
3350 * NOTE: in the case of entity values, we don't do the
3351 * substitution here since we need the literal
3352 * entity value to be able to save the internal
3353 * subset of the document.
3354 * This will be handled by xmlStringDecodeEntities
3355 */
3356 return;
3357 case XML_PARSER_CONTENT:
3358 return;
3359 case XML_PARSER_ATTRIBUTE_VALUE:
3360 /* ctxt->token = xmlParseCharRef(ctxt); */
3361 return;
3362 case XML_PARSER_IGNORE:
3363 return;
3364 }
3365 return;
3366 }
3367
3368 switch(ctxt->instate) {
3369 case XML_PARSER_CDATA_SECTION:
3370 return;
3371 case XML_PARSER_PI:
3372 case XML_PARSER_COMMENT:
3373 case XML_PARSER_SYSTEM_LITERAL:
3374 case XML_PARSER_CONTENT:
3375 return;
3376 case XML_PARSER_START_TAG:
3377 return;
3378 case XML_PARSER_END_TAG:
3379 return;
3380 case XML_PARSER_EOF:
3381 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3384 ctxt->wellFormed = 0;
3385 ctxt->disableSAX = 1;
3386 return;
3387 case XML_PARSER_PROLOG:
3388 case XML_PARSER_START:
3389 case XML_PARSER_MISC:
3390 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3392 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3393 ctxt->wellFormed = 0;
3394 ctxt->disableSAX = 1;
3395 return;
3396 case XML_PARSER_EPILOG:
3397 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3399 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3400 ctxt->wellFormed = 0;
3401 ctxt->disableSAX = 1;
3402 return;
3403 case XML_PARSER_ENTITY_VALUE:
3404 /*
3405 * NOTE: in the case of entity values, we don't do the
3406 * substitution here since we need the literal
3407 * entity value to be able to save the internal
3408 * subset of the document.
3409 * This will be handled by xmlStringDecodeEntities
3410 */
3411 return;
3412 case XML_PARSER_ATTRIBUTE_VALUE:
3413 /*
3414 * NOTE: in the case of attributes values, we don't do the
3415 * substitution here unless we are in a mode where
3416 * the parser is explicitely asked to substitute
3417 * entities. The SAX callback is called with values
3418 * without entity substitution.
3419 * This will then be handled by xmlStringDecodeEntities
3420 */
3421 return;
3422 case XML_PARSER_ENTITY_DECL:
3423 /*
3424 * we just ignore it there
3425 * the substitution will be done once the entity is referenced
3426 */
3427 return;
3428 case XML_PARSER_DTD:
3429 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3431 ctxt->sax->error(ctxt->userData,
3432 "Entity references are forbiden in DTDs!\n");
3433 ctxt->wellFormed = 0;
3434 ctxt->disableSAX = 1;
3435 return;
3436 case XML_PARSER_IGNORE:
3437 return;
3438 }
3439
3440/* TODO: this seems not reached anymore .... Verify ... */
3441xmlGenericError(xmlGenericErrorContext,
3442 "Reached deprecated section in xmlParserHandleReference()\n");
3443xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003444 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003445xmlGenericError(xmlGenericErrorContext,
3446 "indicating the version: %s, thanks !\n", xmlParserVersion);
3447 NEXT;
3448 name = xmlScanName(ctxt);
3449 if (name == NULL) {
3450 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3452 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3453 ctxt->wellFormed = 0;
3454 ctxt->disableSAX = 1;
3455 ctxt->token = '&';
3456 return;
3457 }
3458 if (NXT(xmlStrlen(name)) != ';') {
3459 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData,
3462 "Entity reference: ';' expected\n");
3463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 ctxt->token = '&';
3466 xmlFree(name);
3467 return;
3468 }
3469 SKIP(xmlStrlen(name) + 1);
3470 if (ctxt->sax != NULL) {
3471 if (ctxt->sax->getEntity != NULL)
3472 ent = ctxt->sax->getEntity(ctxt->userData, name);
3473 }
3474
3475 /*
3476 * [ WFC: Entity Declared ]
3477 * the Name given in the entity reference must match that in an entity
3478 * declaration, except that well-formed documents need not declare any
3479 * of the following entities: amp, lt, gt, apos, quot.
3480 */
3481 if (ent == NULL)
3482 ent = xmlGetPredefinedEntity(name);
3483 if (ent == NULL) {
3484 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3486 ctxt->sax->error(ctxt->userData,
3487 "Entity reference: entity %s not declared\n",
3488 name);
3489 ctxt->wellFormed = 0;
3490 ctxt->disableSAX = 1;
3491 xmlFree(name);
3492 return;
3493 }
3494
3495 /*
3496 * [ WFC: Parsed Entity ]
3497 * An entity reference must not contain the name of an unparsed entity
3498 */
3499 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3500 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3502 ctxt->sax->error(ctxt->userData,
3503 "Entity reference to unparsed entity %s\n", name);
3504 ctxt->wellFormed = 0;
3505 ctxt->disableSAX = 1;
3506 }
3507
3508 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3509 ctxt->token = ent->content[0];
3510 xmlFree(name);
3511 return;
3512 }
3513 input = xmlNewEntityInputStream(ctxt, ent);
3514 xmlPushInput(ctxt, input);
3515 xmlFree(name);
3516#endif
3517 return;
3518}
3519
3520/**
3521 * xmlHandleEntity:
3522 * @ctxt: an XML parser context
3523 * @entity: an XML entity pointer.
3524 *
3525 * Default handling of defined entities, when should we define a new input
3526 * stream ? When do we just handle that as a set of chars ?
3527 *
3528 * OBSOLETE: to be removed at some point.
3529 */
3530
3531void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003532xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003533 static int deprecated = 0;
3534 if (!deprecated) {
3535 xmlGenericError(xmlGenericErrorContext,
3536 "xmlHandleEntity() deprecated function reached\n");
3537 deprecated = 1;
3538 }
3539
3540#if 0
3541 int len;
3542 xmlParserInputPtr input;
3543
3544 if (entity->content == NULL) {
3545 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3547 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3548 entity->name);
3549 ctxt->wellFormed = 0;
3550 ctxt->disableSAX = 1;
3551 return;
3552 }
3553 len = xmlStrlen(entity->content);
3554 if (len <= 2) goto handle_as_char;
3555
3556 /*
3557 * Redefine its content as an input stream.
3558 */
3559 input = xmlNewEntityInputStream(ctxt, entity);
3560 xmlPushInput(ctxt, input);
3561 return;
3562
3563handle_as_char:
3564 /*
3565 * Just handle the content as a set of chars.
3566 */
3567 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3568 (ctxt->sax->characters != NULL))
3569 ctxt->sax->characters(ctxt->userData, entity->content, len);
3570#endif
3571}
3572
3573/**
3574 * xmlNewGlobalNs:
3575 * @doc: the document carrying the namespace
3576 * @href: the URI associated
3577 * @prefix: the prefix for the namespace
3578 *
3579 * Creation of a Namespace, the old way using PI and without scoping
3580 * DEPRECATED !!!
3581 * It now create a namespace on the root element of the document if found.
3582 * Returns NULL this functionnality had been removed
3583 */
3584xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003585xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3586 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003587 static int deprecated = 0;
3588 if (!deprecated) {
3589 xmlGenericError(xmlGenericErrorContext,
3590 "xmlNewGlobalNs() deprecated function reached\n");
3591 deprecated = 1;
3592 }
3593 return(NULL);
3594#if 0
3595 xmlNodePtr root;
3596
3597 xmlNsPtr cur;
3598
3599 root = xmlDocGetRootElement(doc);
3600 if (root != NULL)
3601 return(xmlNewNs(root, href, prefix));
3602
3603 /*
3604 * if there is no root element yet, create an old Namespace type
3605 * and it will be moved to the root at save time.
3606 */
3607 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3608 if (cur == NULL) {
3609 xmlGenericError(xmlGenericErrorContext,
3610 "xmlNewGlobalNs : malloc failed\n");
3611 return(NULL);
3612 }
3613 memset(cur, 0, sizeof(xmlNs));
3614 cur->type = XML_GLOBAL_NAMESPACE;
3615
3616 if (href != NULL)
3617 cur->href = xmlStrdup(href);
3618 if (prefix != NULL)
3619 cur->prefix = xmlStrdup(prefix);
3620
3621 /*
3622 * Add it at the end to preserve parsing order ...
3623 */
3624 if (doc != NULL) {
3625 if (doc->oldNs == NULL) {
3626 doc->oldNs = cur;
3627 } else {
3628 xmlNsPtr prev = doc->oldNs;
3629
3630 while (prev->next != NULL) prev = prev->next;
3631 prev->next = cur;
3632 }
3633 }
3634
3635 return(NULL);
3636#endif
3637}
3638
3639/**
3640 * xmlUpgradeOldNs:
3641 * @doc: a document pointer
3642 *
3643 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3644 * DEPRECATED
3645 */
3646void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003647xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 static int deprecated = 0;
3649 if (!deprecated) {
3650 xmlGenericError(xmlGenericErrorContext,
3651 "xmlNewGlobalNs() deprecated function reached\n");
3652 deprecated = 1;
3653 }
3654#if 0
3655 xmlNsPtr cur;
3656
3657 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3658 if (doc->children == NULL) {
3659#ifdef DEBUG_TREE
3660 xmlGenericError(xmlGenericErrorContext,
3661 "xmlUpgradeOldNs: failed no root !\n");
3662#endif
3663 return;
3664 }
3665
3666 cur = doc->oldNs;
3667 while (cur->next != NULL) {
3668 cur->type = XML_LOCAL_NAMESPACE;
3669 cur = cur->next;
3670 }
3671 cur->type = XML_LOCAL_NAMESPACE;
3672 cur->next = doc->children->nsDef;
3673 doc->children->nsDef = doc->oldNs;
3674 doc->oldNs = NULL;
3675#endif
3676}
3677