blob: 7ab7812df3d8775adcbcf3c0c5d4d3d390fff125 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Owen Taylor3473f882001-02-23 17:55:21 +000053
Daniel Veillard56a4cb82001-03-24 17:00:36 +000054void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000055
Daniel Veillarda53c6882001-07-25 17:18:57 +000056/*
57 * Various global defaults for parsing
58 */
59int xmlGetWarningsDefaultValue = 1;
60#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
65#else
66int xmlSubstituteEntitiesDefaultValue = 0;
67int xmlDoValidityCheckingDefaultValue = 0;
68#endif
69int xmlLoadExtDtdDefaultValue = 0;
70int xmlPedanticParserDefaultValue = 0;
71int xmlLineNumbersDefaultValue = 0;
72int xmlKeepBlanksDefaultValue = 1;
73
Owen Taylor3473f882001-02-23 17:55:21 +000074/************************************************************************
75 * *
76 * Version and Features handling *
77 * *
78 ************************************************************************/
79const char *xmlParserVersion = LIBXML_VERSION_STRING;
80
Daniel Veillard5e2dace2001-07-18 19:30:27 +000081/**
Owen Taylor3473f882001-02-23 17:55:21 +000082 * xmlCheckVersion:
83 * @version: the include version number
84 *
85 * check the compiled lib version against the include one.
86 * This can warn or immediately kill the application
87 */
88void
89xmlCheckVersion(int version) {
90 int myversion = (int) LIBXML_VERSION;
91
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000092 xmlInitMemory();
93
Owen Taylor3473f882001-02-23 17:55:21 +000094 if ((myversion / 10000) != (version / 10000)) {
95 xmlGenericError(xmlGenericErrorContext,
96 "Fatal: program compiled against libxml %d using libxml %d\n",
97 (version / 10000), (myversion / 10000));
98 exit(1);
99 }
100 if ((myversion / 100) < (version / 100)) {
101 xmlGenericError(xmlGenericErrorContext,
102 "Warning: program compiled against libxml %d using older %d\n",
103 (version / 100), (myversion / 100));
104 }
105}
106
107
Daniel Veillard22090732001-07-16 00:06:07 +0000108static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 "validate",
110 "load subset",
111 "keep blanks",
112 "disable SAX",
113 "fetch external entities",
114 "substitute entities",
115 "gather line info",
116 "user data",
117 "is html",
118 "is standalone",
119 "stop parser",
120 "document",
121 "is well formed",
122 "is valid",
123 "SAX block",
124 "SAX function internalSubset",
125 "SAX function isStandalone",
126 "SAX function hasInternalSubset",
127 "SAX function hasExternalSubset",
128 "SAX function resolveEntity",
129 "SAX function getEntity",
130 "SAX function entityDecl",
131 "SAX function notationDecl",
132 "SAX function attributeDecl",
133 "SAX function elementDecl",
134 "SAX function unparsedEntityDecl",
135 "SAX function setDocumentLocator",
136 "SAX function startDocument",
137 "SAX function endDocument",
138 "SAX function startElement",
139 "SAX function endElement",
140 "SAX function reference",
141 "SAX function characters",
142 "SAX function ignorableWhitespace",
143 "SAX function processingInstruction",
144 "SAX function comment",
145 "SAX function warning",
146 "SAX function error",
147 "SAX function fatalError",
148 "SAX function getParameterEntity",
149 "SAX function cdataBlock",
150 "SAX function externalSubset",
151};
152
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000153/**
Owen Taylor3473f882001-02-23 17:55:21 +0000154 * xmlGetFeaturesList:
155 * @len: the length of the features name array (input/output)
156 * @result: an array of string to be filled with the features name.
157 *
158 * Copy at most *@len feature names into the @result array
159 *
160 * Returns -1 in case or error, or the total number of features,
161 * len is updated with the number of strings copied,
162 * strings must not be deallocated
163 */
164int
165xmlGetFeaturesList(int *len, const char **result) {
166 int ret, i;
167
168 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
169 if ((len == NULL) || (result == NULL))
170 return(ret);
171 if ((*len < 0) || (*len >= 1000))
172 return(-1);
173 if (*len > ret)
174 *len = ret;
175 for (i = 0;i < *len;i++)
176 result[i] = xmlFeaturesList[i];
177 return(ret);
178}
179
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180/**
Owen Taylor3473f882001-02-23 17:55:21 +0000181 * xmlGetFeature:
182 * @ctxt: an XML/HTML parser context
183 * @name: the feature name
184 * @result: location to store the result
185 *
186 * Read the current value of one feature of this parser instance
187 *
188 * Returns -1 in case or error, 0 otherwise
189 */
190int
191xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
192 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
193 return(-1);
194
195 if (!strcmp(name, "validate")) {
196 *((int *) result) = ctxt->validate;
197 } else if (!strcmp(name, "keep blanks")) {
198 *((int *) result) = ctxt->keepBlanks;
199 } else if (!strcmp(name, "disable SAX")) {
200 *((int *) result) = ctxt->disableSAX;
201 } else if (!strcmp(name, "fetch external entities")) {
202 *((int *) result) = ctxt->loadsubset;
203 } else if (!strcmp(name, "substitute entities")) {
204 *((int *) result) = ctxt->replaceEntities;
205 } else if (!strcmp(name, "gather line info")) {
206 *((int *) result) = ctxt->record_info;
207 } else if (!strcmp(name, "user data")) {
208 *((void **)result) = ctxt->userData;
209 } else if (!strcmp(name, "is html")) {
210 *((int *) result) = ctxt->html;
211 } else if (!strcmp(name, "is standalone")) {
212 *((int *) result) = ctxt->standalone;
213 } else if (!strcmp(name, "document")) {
214 *((xmlDocPtr *) result) = ctxt->myDoc;
215 } else if (!strcmp(name, "is well formed")) {
216 *((int *) result) = ctxt->wellFormed;
217 } else if (!strcmp(name, "is valid")) {
218 *((int *) result) = ctxt->valid;
219 } else if (!strcmp(name, "SAX block")) {
220 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
221 } else if (!strcmp(name, "SAX function internalSubset")) {
222 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
223 } else if (!strcmp(name, "SAX function isStandalone")) {
224 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
225 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
226 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
227 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
228 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
229 } else if (!strcmp(name, "SAX function resolveEntity")) {
230 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
231 } else if (!strcmp(name, "SAX function getEntity")) {
232 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
233 } else if (!strcmp(name, "SAX function entityDecl")) {
234 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
235 } else if (!strcmp(name, "SAX function notationDecl")) {
236 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
237 } else if (!strcmp(name, "SAX function attributeDecl")) {
238 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
239 } else if (!strcmp(name, "SAX function elementDecl")) {
240 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
241 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
242 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
243 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
244 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
245 } else if (!strcmp(name, "SAX function startDocument")) {
246 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
247 } else if (!strcmp(name, "SAX function endDocument")) {
248 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
249 } else if (!strcmp(name, "SAX function startElement")) {
250 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
251 } else if (!strcmp(name, "SAX function endElement")) {
252 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
253 } else if (!strcmp(name, "SAX function reference")) {
254 *((referenceSAXFunc *) result) = ctxt->sax->reference;
255 } else if (!strcmp(name, "SAX function characters")) {
256 *((charactersSAXFunc *) result) = ctxt->sax->characters;
257 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
258 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
259 } else if (!strcmp(name, "SAX function processingInstruction")) {
260 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
261 } else if (!strcmp(name, "SAX function comment")) {
262 *((commentSAXFunc *) result) = ctxt->sax->comment;
263 } else if (!strcmp(name, "SAX function warning")) {
264 *((warningSAXFunc *) result) = ctxt->sax->warning;
265 } else if (!strcmp(name, "SAX function error")) {
266 *((errorSAXFunc *) result) = ctxt->sax->error;
267 } else if (!strcmp(name, "SAX function fatalError")) {
268 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
269 } else if (!strcmp(name, "SAX function getParameterEntity")) {
270 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
271 } else if (!strcmp(name, "SAX function cdataBlock")) {
272 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
273 } else if (!strcmp(name, "SAX function externalSubset")) {
274 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
275 } else {
276 return(-1);
277 }
278 return(0);
279}
280
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000281/**
Owen Taylor3473f882001-02-23 17:55:21 +0000282 * xmlSetFeature:
283 * @ctxt: an XML/HTML parser context
284 * @name: the feature name
285 * @value: pointer to the location of the new value
286 *
287 * Change the current value of one feature of this parser instance
288 *
289 * Returns -1 in case or error, 0 otherwise
290 */
291int
292xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
293 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
294 return(-1);
295
296 if (!strcmp(name, "validate")) {
297 int newvalidate = *((int *) value);
298 if ((!ctxt->validate) && (newvalidate != 0)) {
299 if (ctxt->vctxt.warning == NULL)
300 ctxt->vctxt.warning = xmlParserValidityWarning;
301 if (ctxt->vctxt.error == NULL)
302 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000303 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000304 }
305 ctxt->validate = newvalidate;
306 } else if (!strcmp(name, "keep blanks")) {
307 ctxt->keepBlanks = *((int *) value);
308 } else if (!strcmp(name, "disable SAX")) {
309 ctxt->disableSAX = *((int *) value);
310 } else if (!strcmp(name, "fetch external entities")) {
311 ctxt->loadsubset = *((int *) value);
312 } else if (!strcmp(name, "substitute entities")) {
313 ctxt->replaceEntities = *((int *) value);
314 } else if (!strcmp(name, "gather line info")) {
315 ctxt->record_info = *((int *) value);
316 } else if (!strcmp(name, "user data")) {
317 ctxt->userData = *((void **)value);
318 } else if (!strcmp(name, "is html")) {
319 ctxt->html = *((int *) value);
320 } else if (!strcmp(name, "is standalone")) {
321 ctxt->standalone = *((int *) value);
322 } else if (!strcmp(name, "document")) {
323 ctxt->myDoc = *((xmlDocPtr *) value);
324 } else if (!strcmp(name, "is well formed")) {
325 ctxt->wellFormed = *((int *) value);
326 } else if (!strcmp(name, "is valid")) {
327 ctxt->valid = *((int *) value);
328 } else if (!strcmp(name, "SAX block")) {
329 ctxt->sax = *((xmlSAXHandlerPtr *) value);
330 } else if (!strcmp(name, "SAX function internalSubset")) {
331 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function isStandalone")) {
333 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
335 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
337 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function resolveEntity")) {
339 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
340 } else if (!strcmp(name, "SAX function getEntity")) {
341 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
342 } else if (!strcmp(name, "SAX function entityDecl")) {
343 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function notationDecl")) {
345 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function attributeDecl")) {
347 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function elementDecl")) {
349 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
351 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
353 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function startDocument")) {
355 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function endDocument")) {
357 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function startElement")) {
359 ctxt->sax->startElement = *((startElementSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function endElement")) {
361 ctxt->sax->endElement = *((endElementSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function reference")) {
363 ctxt->sax->reference = *((referenceSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function characters")) {
365 ctxt->sax->characters = *((charactersSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
367 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function processingInstruction")) {
369 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function comment")) {
371 ctxt->sax->comment = *((commentSAXFunc *) value);
372 } else if (!strcmp(name, "SAX function warning")) {
373 ctxt->sax->warning = *((warningSAXFunc *) value);
374 } else if (!strcmp(name, "SAX function error")) {
375 ctxt->sax->error = *((errorSAXFunc *) value);
376 } else if (!strcmp(name, "SAX function fatalError")) {
377 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
378 } else if (!strcmp(name, "SAX function getParameterEntity")) {
379 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
380 } else if (!strcmp(name, "SAX function cdataBlock")) {
381 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
382 } else if (!strcmp(name, "SAX function externalSubset")) {
383 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
384 } else {
385 return(-1);
386 }
387 return(0);
388}
389
390/************************************************************************
391 * *
392 * Some functions to avoid too large macros *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlIsChar:
398 * @c: an unicode character (int)
399 *
400 * Check whether the character is allowed by the production
401 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
402 * | [#x10000-#x10FFFF]
403 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
404 * Also available as a macro IS_CHAR()
405 *
406 * Returns 0 if not, non-zero otherwise
407 */
408int
409xmlIsChar(int c) {
410 return(
411 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
412 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
413 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
414 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
415}
416
417/**
418 * xmlIsBlank:
419 * @c: an unicode character (int)
420 *
421 * Check whether the character is allowed by the production
422 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
423 * Also available as a macro IS_BLANK()
424 *
425 * Returns 0 if not, non-zero otherwise
426 */
427int
428xmlIsBlank(int c) {
429 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
430}
431
432/**
433 * xmlIsBaseChar:
434 * @c: an unicode character (int)
435 *
436 * Check whether the character is allowed by the production
437 * [85] BaseChar ::= ... long list see REC ...
438 *
439 * VI is your friend !
440 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
441 * and
442 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
443 *
444 * Returns 0 if not, non-zero otherwise
445 */
446static int xmlBaseArray[] = {
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
451 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
452 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
453 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
459 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
460 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
462 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
463};
464
465int
466xmlIsBaseChar(int c) {
467 return(
468 (((c) < 0x0100) ? xmlBaseArray[c] :
469 ( /* accelerator */
470 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
471 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
472 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
473 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
474 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
475 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
476 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
477 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
478 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
479 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
480 ((c) == 0x0386) ||
481 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
482 ((c) == 0x038C) ||
483 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
484 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
485 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
486 ((c) == 0x03DA) ||
487 ((c) == 0x03DC) ||
488 ((c) == 0x03DE) ||
489 ((c) == 0x03E0) ||
490 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
491 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
492 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
493 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
494 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
495 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
496 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
497 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
498 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
499 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
500 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
501 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
502 ((c) == 0x0559) ||
503 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
504 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
505 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
506 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
507 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
508 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
509 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
510 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
511 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
512 ((c) == 0x06D5) ||
513 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
514 (((c) >= 0x905) && ( /* accelerator */
515 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
516 ((c) == 0x093D) ||
517 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
518 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
519 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
520 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
521 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
522 ((c) == 0x09B2) ||
523 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
524 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
525 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
526 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
527 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
528 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
529 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
530 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
531 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
532 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
533 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
534 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
535 ((c) == 0x0A5E) ||
536 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
537 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
538 ((c) == 0x0A8D) ||
539 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
540 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
541 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
542 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
543 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
544 ((c) == 0x0ABD) ||
545 ((c) == 0x0AE0) ||
546 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
547 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
548 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
549 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
550 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
551 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
552 ((c) == 0x0B3D) ||
553 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
554 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
555 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
556 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
557 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
558 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
559 ((c) == 0x0B9C) ||
560 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
561 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
562 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
563 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
564 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
565 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
566 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
567 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
568 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
569 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
570 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
571 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
572 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
573 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
574 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
575 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
576 ((c) == 0x0CDE) ||
577 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
578 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
579 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
580 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
581 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
582 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
583 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
584 ((c) == 0x0E30) ||
585 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
586 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
587 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
588 ((c) == 0x0E84) ||
589 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
590 ((c) == 0x0E8A) ||
591 ((c) == 0x0E8D) ||
592 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
593 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
594 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
595 ((c) == 0x0EA5) ||
596 ((c) == 0x0EA7) ||
597 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
598 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
599 ((c) == 0x0EB0) ||
600 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
601 ((c) == 0x0EBD) ||
602 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
603 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
604 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
605 (((c) >= 0x10A0) && ( /* accelerator */
606 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
607 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
608 ((c) == 0x1100) ||
609 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
610 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
611 ((c) == 0x1109) ||
612 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
613 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
614 ((c) == 0x113C) ||
615 ((c) == 0x113E) ||
616 ((c) == 0x1140) ||
617 ((c) == 0x114C) ||
618 ((c) == 0x114E) ||
619 ((c) == 0x1150) ||
620 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
621 ((c) == 0x1159) ||
622 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
623 ((c) == 0x1163) ||
624 ((c) == 0x1165) ||
625 ((c) == 0x1167) ||
626 ((c) == 0x1169) ||
627 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
628 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
629 ((c) == 0x1175) ||
630 ((c) == 0x119E) ||
631 ((c) == 0x11A8) ||
632 ((c) == 0x11AB) ||
633 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
634 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
635 ((c) == 0x11BA) ||
636 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
637 ((c) == 0x11EB) ||
638 ((c) == 0x11F0) ||
639 ((c) == 0x11F9) ||
640 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
641 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
642 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
643 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
644 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
645 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
646 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
647 ((c) == 0x1F59) ||
648 ((c) == 0x1F5B) ||
649 ((c) == 0x1F5D) ||
650 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
651 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
652 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
653 ((c) == 0x1FBE) ||
654 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
655 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
656 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
657 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
658 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
659 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
660 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
661 ((c) == 0x2126) ||
662 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
663 ((c) == 0x212E) ||
664 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
665 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
666 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
667 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
668 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
669}
670
671/**
672 * xmlIsDigit:
673 * @c: an unicode character (int)
674 *
675 * Check whether the character is allowed by the production
676 * [88] Digit ::= ... long list see REC ...
677 *
678 * Returns 0 if not, non-zero otherwise
679 */
680int
681xmlIsDigit(int c) {
682 return(
683 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
684 (((c) >= 0x660) && ( /* accelerator */
685 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
686 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
687 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
688 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
689 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
690 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
691 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
692 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
693 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
694 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
695 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
696 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
697 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
698 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
699}
700
701/**
702 * xmlIsCombining:
703 * @c: an unicode character (int)
704 *
705 * Check whether the character is allowed by the production
706 * [87] CombiningChar ::= ... long list see REC ...
707 *
708 * Returns 0 if not, non-zero otherwise
709 */
710int
711xmlIsCombining(int c) {
712 return(
713 (((c) >= 0x300) && ( /* accelerator */
714 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
715 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
716 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
717 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
718 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
719 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
720 ((c) == 0x05BF) ||
721 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
722 ((c) == 0x05C4) ||
723 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
724 ((c) == 0x0670) ||
725 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
726 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
727 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
728 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
729 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
730 (((c) >= 0x0901) && ( /* accelerator */
731 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
732 ((c) == 0x093C) ||
733 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
734 ((c) == 0x094D) ||
735 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
736 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
737 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
738 ((c) == 0x09BC) ||
739 ((c) == 0x09BE) ||
740 ((c) == 0x09BF) ||
741 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
742 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
743 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
744 ((c) == 0x09D7) ||
745 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
746 (((c) >= 0x0A02) && ( /* accelerator */
747 ((c) == 0x0A02) ||
748 ((c) == 0x0A3C) ||
749 ((c) == 0x0A3E) ||
750 ((c) == 0x0A3F) ||
751 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
752 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
753 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
754 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
755 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
756 ((c) == 0x0ABC) ||
757 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
758 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
759 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
760 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
761 ((c) == 0x0B3C) ||
762 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
763 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
764 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
765 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
766 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
767 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
768 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
769 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
770 ((c) == 0x0BD7) ||
771 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
772 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
773 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
774 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
775 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
776 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
777 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
778 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
779 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
780 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
781 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
782 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
783 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
784 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
785 ((c) == 0x0D57) ||
786 (((c) >= 0x0E31) && ( /* accelerator */
787 ((c) == 0x0E31) ||
788 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
789 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
790 ((c) == 0x0EB1) ||
791 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
792 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
793 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
794 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
795 ((c) == 0x0F35) ||
796 ((c) == 0x0F37) ||
797 ((c) == 0x0F39) ||
798 ((c) == 0x0F3E) ||
799 ((c) == 0x0F3F) ||
800 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
801 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
802 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
803 ((c) == 0x0F97) ||
804 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
805 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
806 ((c) == 0x0FB9) ||
807 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
808 ((c) == 0x20E1) ||
809 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
810 ((c) == 0x3099) ||
811 ((c) == 0x309A))))))))));
812}
813
814/**
815 * xmlIsExtender:
816 * @c: an unicode character (int)
817 *
818 * Check whether the character is allowed by the production
819 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
820 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
821 * [#x309D-#x309E] | [#x30FC-#x30FE]
822 *
823 * Returns 0 if not, non-zero otherwise
824 */
825int
826xmlIsExtender(int c) {
827 switch (c) {
828 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
829 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
830 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
831 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
832 case 0x30FE:
833 return 1;
834 default:
835 return 0;
836 }
837}
838
839/**
840 * xmlIsIdeographic:
841 * @c: an unicode character (int)
842 *
843 * Check whether the character is allowed by the production
844 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
845 *
846 * Returns 0 if not, non-zero otherwise
847 */
848int
849xmlIsIdeographic(int c) {
850 return(((c) < 0x0100) ? 0 :
851 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
852 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
853 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
854 ((c) == 0x3007));
855}
856
857/**
858 * xmlIsLetter:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [84] Letter ::= BaseChar | Ideographic
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsLetter(int c) {
868 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
869}
870
871/**
872 * xmlIsPubidChar:
873 * @c: an unicode character (int)
874 *
875 * Check whether the character is allowed by the production
876 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
877 *
878 * Returns 0 if not, non-zero otherwise
879 */
880int
881xmlIsPubidChar(int c) {
882 return(
883 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
884 (((c) >= 'a') && ((c) <= 'z')) ||
885 (((c) >= 'A') && ((c) <= 'Z')) ||
886 (((c) >= '0') && ((c) <= '9')) ||
887 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
888 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
889 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
890 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
891 ((c) == '$') || ((c) == '_') || ((c) == '%'));
892}
893
894/************************************************************************
895 * *
896 * Input handling functions for progressive parsing *
897 * *
898 ************************************************************************/
899
900/* #define DEBUG_INPUT */
901/* #define DEBUG_STACK */
902/* #define DEBUG_PUSH */
903
904
905/* we need to keep enough input to show errors in context */
906#define LINE_LEN 80
907
908#ifdef DEBUG_INPUT
909#define CHECK_BUFFER(in) check_buffer(in)
910
911void check_buffer(xmlParserInputPtr in) {
912 if (in->base != in->buf->buffer->content) {
913 xmlGenericError(xmlGenericErrorContext,
914 "xmlParserInput: base mismatch problem\n");
915 }
916 if (in->cur < in->base) {
917 xmlGenericError(xmlGenericErrorContext,
918 "xmlParserInput: cur < base problem\n");
919 }
920 if (in->cur > in->base + in->buf->buffer->use) {
921 xmlGenericError(xmlGenericErrorContext,
922 "xmlParserInput: cur > base + use problem\n");
923 }
924 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
925 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
926 in->buf->buffer->use, in->buf->buffer->size);
927}
928
929#else
930#define CHECK_BUFFER(in)
931#endif
932
933
934/**
935 * xmlParserInputRead:
936 * @in: an XML parser input
937 * @len: an indicative size for the lookahead
938 *
939 * This function refresh the input for the parser. It doesn't try to
940 * preserve pointers to the input buffer, and discard already read data
941 *
942 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
943 * end of this entity
944 */
945int
946xmlParserInputRead(xmlParserInputPtr in, int len) {
947 int ret;
948 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000949 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000950
951#ifdef DEBUG_INPUT
952 xmlGenericError(xmlGenericErrorContext, "Read\n");
953#endif
954 if (in->buf == NULL) return(-1);
955 if (in->base == NULL) return(-1);
956 if (in->cur == NULL) return(-1);
957 if (in->buf->buffer == NULL) return(-1);
958 if (in->buf->readcallback == NULL) return(-1);
959
960 CHECK_BUFFER(in);
961
962 used = in->cur - in->buf->buffer->content;
963 ret = xmlBufferShrink(in->buf->buffer, used);
964 if (ret > 0) {
965 in->cur -= ret;
966 in->consumed += ret;
967 }
968 ret = xmlParserInputBufferRead(in->buf, len);
969 if (in->base != in->buf->buffer->content) {
970 /*
971 * the buffer has been realloced
972 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000973 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000974 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000975 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000976 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000977 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000978
979 CHECK_BUFFER(in);
980
981 return(ret);
982}
983
984/**
985 * xmlParserInputGrow:
986 * @in: an XML parser input
987 * @len: an indicative size for the lookahead
988 *
989 * This function increase the input for the parser. It tries to
990 * preserve pointers to the input buffer, and keep already read data
991 *
992 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
993 * end of this entity
994 */
995int
996xmlParserInputGrow(xmlParserInputPtr in, int len) {
997 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000999
1000#ifdef DEBUG_INPUT
1001 xmlGenericError(xmlGenericErrorContext, "Grow\n");
1002#endif
1003 if (in->buf == NULL) return(-1);
1004 if (in->base == NULL) return(-1);
1005 if (in->cur == NULL) return(-1);
1006 if (in->buf->buffer == NULL) return(-1);
1007
1008 CHECK_BUFFER(in);
1009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010 indx = in->cur - in->base;
1011 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001012
1013 CHECK_BUFFER(in);
1014
1015 return(0);
1016 }
1017 if (in->buf->readcallback != NULL)
1018 ret = xmlParserInputBufferGrow(in->buf, len);
1019 else
1020 return(0);
1021
1022 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001023 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001024 * block, but we use it really as an integer to do some
1025 * pointer arithmetic. Insure will raise it as a bug but in
1026 * that specific case, that's not !
1027 */
1028 if (in->base != in->buf->buffer->content) {
1029 /*
1030 * the buffer has been realloced
1031 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001032 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001033 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001034 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001035 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001036 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001037
1038 CHECK_BUFFER(in);
1039
1040 return(ret);
1041}
1042
1043/**
1044 * xmlParserInputShrink:
1045 * @in: an XML parser input
1046 *
1047 * This function removes used input for the parser.
1048 */
1049void
1050xmlParserInputShrink(xmlParserInputPtr in) {
1051 int used;
1052 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001053 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001054
1055#ifdef DEBUG_INPUT
1056 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1057#endif
1058 if (in->buf == NULL) return;
1059 if (in->base == NULL) return;
1060 if (in->cur == NULL) return;
1061 if (in->buf->buffer == NULL) return;
1062
1063 CHECK_BUFFER(in);
1064
1065 used = in->cur - in->buf->buffer->content;
1066 /*
1067 * Do not shrink on large buffers whose only a tiny fraction
1068 * was consumned
1069 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001070 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001071 return;
1072 if (used > INPUT_CHUNK) {
1073 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1074 if (ret > 0) {
1075 in->cur -= ret;
1076 in->consumed += ret;
1077 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001078 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001079 }
1080
1081 CHECK_BUFFER(in);
1082
1083 if (in->buf->buffer->use > INPUT_CHUNK) {
1084 return;
1085 }
1086 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1087 if (in->base != in->buf->buffer->content) {
1088 /*
1089 * the buffer has been realloced
1090 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001091 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001092 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001093 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001094 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001095 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001096
1097 CHECK_BUFFER(in);
1098}
1099
1100/************************************************************************
1101 * *
1102 * UTF8 character input and related functions *
1103 * *
1104 ************************************************************************/
1105
1106/**
1107 * xmlNextChar:
1108 * @ctxt: the XML parser context
1109 *
1110 * Skip to the next char input char.
1111 */
1112
1113void
1114xmlNextChar(xmlParserCtxtPtr ctxt) {
1115 if (ctxt->instate == XML_PARSER_EOF)
1116 return;
1117
1118 /*
1119 * 2.11 End-of-Line Handling
1120 * the literal two-character sequence "#xD#xA" or a standalone
1121 * literal #xD, an XML processor must pass to the application
1122 * the single character #xA.
1123 */
1124 if (ctxt->token != 0) ctxt->token = 0;
1125 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1126 if ((*ctxt->input->cur == 0) &&
1127 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1128 (ctxt->instate != XML_PARSER_COMMENT)) {
1129 /*
1130 * If we are at the end of the current entity and
1131 * the context allows it, we pop consumed entities
1132 * automatically.
1133 * the auto closing should be blocked in other cases
1134 */
1135 xmlPopInput(ctxt);
1136 } else {
1137 if (*(ctxt->input->cur) == '\n') {
1138 ctxt->input->line++; ctxt->input->col = 1;
1139 } else ctxt->input->col++;
1140 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1141 /*
1142 * We are supposed to handle UTF8, check it's valid
1143 * From rfc2044: encoding of the Unicode values on UTF-8:
1144 *
1145 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1146 * 0000 0000-0000 007F 0xxxxxxx
1147 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1148 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1149 *
1150 * Check for the 0x110000 limit too
1151 */
1152 const unsigned char *cur = ctxt->input->cur;
1153 unsigned char c;
1154
1155 c = *cur;
1156 if (c & 0x80) {
1157 if (cur[1] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if ((cur[1] & 0xc0) != 0x80)
1160 goto encoding_error;
1161 if ((c & 0xe0) == 0xe0) {
1162 unsigned int val;
1163
1164 if (cur[2] == 0)
1165 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1166 if ((cur[2] & 0xc0) != 0x80)
1167 goto encoding_error;
1168 if ((c & 0xf0) == 0xf0) {
1169 if (cur[3] == 0)
1170 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1171 if (((c & 0xf8) != 0xf0) ||
1172 ((cur[3] & 0xc0) != 0x80))
1173 goto encoding_error;
1174 /* 4-byte code */
1175 ctxt->input->cur += 4;
1176 val = (cur[0] & 0x7) << 18;
1177 val |= (cur[1] & 0x3f) << 12;
1178 val |= (cur[2] & 0x3f) << 6;
1179 val |= cur[3] & 0x3f;
1180 } else {
1181 /* 3-byte code */
1182 ctxt->input->cur += 3;
1183 val = (cur[0] & 0xf) << 12;
1184 val |= (cur[1] & 0x3f) << 6;
1185 val |= cur[2] & 0x3f;
1186 }
1187 if (((val > 0xd7ff) && (val < 0xe000)) ||
1188 ((val > 0xfffd) && (val < 0x10000)) ||
1189 (val >= 0x110000)) {
1190 if ((ctxt->sax != NULL) &&
1191 (ctxt->sax->error != NULL))
1192 ctxt->sax->error(ctxt->userData,
1193 "Char 0x%X out of allowed range\n", val);
1194 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1195 ctxt->wellFormed = 0;
1196 ctxt->disableSAX = 1;
1197 }
1198 } else
1199 /* 2-byte code */
1200 ctxt->input->cur += 2;
1201 } else
1202 /* 1-byte code */
1203 ctxt->input->cur++;
1204 } else {
1205 /*
1206 * Assume it's a fixed lenght encoding (1) with
1207 * a compatibke encoding for the ASCII set, since
1208 * XML constructs only use < 128 chars
1209 */
1210 ctxt->input->cur++;
1211 }
1212 ctxt->nbChars++;
1213 if (*ctxt->input->cur == 0)
1214 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1215 }
1216 } else {
1217 ctxt->input->cur++;
1218 ctxt->nbChars++;
1219 if (*ctxt->input->cur == 0)
1220 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1221 }
1222 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1223 xmlParserHandlePEReference(ctxt);
1224 if ((*ctxt->input->cur == 0) &&
1225 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1226 xmlPopInput(ctxt);
1227 return;
1228encoding_error:
1229 /*
1230 * If we detect an UTF8 error that probably mean that the
1231 * input encoding didn't get properly advertized in the
1232 * declaration header. Report the error and switch the encoding
1233 * to ISO-Latin-1 (if you don't like this policy, just declare the
1234 * encoding !)
1235 */
1236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1237 ctxt->sax->error(ctxt->userData,
1238 "Input is not proper UTF-8, indicate encoding !\n");
1239 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1240 ctxt->input->cur[0], ctxt->input->cur[1],
1241 ctxt->input->cur[2], ctxt->input->cur[3]);
1242 }
1243 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1244
1245 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1246 ctxt->input->cur++;
1247 return;
1248}
1249
1250/**
1251 * xmlCurrentChar:
1252 * @ctxt: the XML parser context
1253 * @len: pointer to the length of the char read
1254 *
1255 * The current char value, if using UTF-8 this may actaully span multiple
1256 * bytes in the input buffer. Implement the end of line normalization:
1257 * 2.11 End-of-Line Handling
1258 * Wherever an external parsed entity or the literal entity value
1259 * of an internal parsed entity contains either the literal two-character
1260 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1261 * must pass to the application the single character #xA.
1262 * This behavior can conveniently be produced by normalizing all
1263 * line breaks to #xA on input, before parsing.)
1264 *
1265 * Returns the current char value and its lenght
1266 */
1267
1268int
1269xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1270 if (ctxt->instate == XML_PARSER_EOF)
1271 return(0);
1272
1273 if (ctxt->token != 0) {
1274 *len = 0;
1275 return(ctxt->token);
1276 }
1277 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1278 *len = 1;
1279 return((int) *ctxt->input->cur);
1280 }
1281 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1282 /*
1283 * We are supposed to handle UTF8, check it's valid
1284 * From rfc2044: encoding of the Unicode values on UTF-8:
1285 *
1286 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1287 * 0000 0000-0000 007F 0xxxxxxx
1288 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1289 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1290 *
1291 * Check for the 0x110000 limit too
1292 */
1293 const unsigned char *cur = ctxt->input->cur;
1294 unsigned char c;
1295 unsigned int val;
1296
1297 c = *cur;
1298 if (c & 0x80) {
1299 if (cur[1] == 0)
1300 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1301 if ((cur[1] & 0xc0) != 0x80)
1302 goto encoding_error;
1303 if ((c & 0xe0) == 0xe0) {
1304
1305 if (cur[2] == 0)
1306 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1307 if ((cur[2] & 0xc0) != 0x80)
1308 goto encoding_error;
1309 if ((c & 0xf0) == 0xf0) {
1310 if (cur[3] == 0)
1311 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1312 if (((c & 0xf8) != 0xf0) ||
1313 ((cur[3] & 0xc0) != 0x80))
1314 goto encoding_error;
1315 /* 4-byte code */
1316 *len = 4;
1317 val = (cur[0] & 0x7) << 18;
1318 val |= (cur[1] & 0x3f) << 12;
1319 val |= (cur[2] & 0x3f) << 6;
1320 val |= cur[3] & 0x3f;
1321 } else {
1322 /* 3-byte code */
1323 *len = 3;
1324 val = (cur[0] & 0xf) << 12;
1325 val |= (cur[1] & 0x3f) << 6;
1326 val |= cur[2] & 0x3f;
1327 }
1328 } else {
1329 /* 2-byte code */
1330 *len = 2;
1331 val = (cur[0] & 0x1f) << 6;
1332 val |= cur[1] & 0x3f;
1333 }
1334 if (!IS_CHAR(val)) {
1335 if ((ctxt->sax != NULL) &&
1336 (ctxt->sax->error != NULL))
1337 ctxt->sax->error(ctxt->userData,
1338 "Char 0x%X out of allowed range\n", val);
1339 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1340 ctxt->wellFormed = 0;
1341 ctxt->disableSAX = 1;
1342 }
1343 return(val);
1344 } else {
1345 /* 1-byte code */
1346 *len = 1;
1347 if (*ctxt->input->cur == 0xD) {
1348 if (ctxt->input->cur[1] == 0xA) {
1349 ctxt->nbChars++;
1350 ctxt->input->cur++;
1351 }
1352 return(0xA);
1353 }
1354 return((int) *ctxt->input->cur);
1355 }
1356 }
1357 /*
1358 * Assume it's a fixed lenght encoding (1) with
1359 * a compatibke encoding for the ASCII set, since
1360 * XML constructs only use < 128 chars
1361 */
1362 *len = 1;
1363 if (*ctxt->input->cur == 0xD) {
1364 if (ctxt->input->cur[1] == 0xA) {
1365 ctxt->nbChars++;
1366 ctxt->input->cur++;
1367 }
1368 return(0xA);
1369 }
1370 return((int) *ctxt->input->cur);
1371encoding_error:
1372 /*
1373 * If we detect an UTF8 error that probably mean that the
1374 * input encoding didn't get properly advertized in the
1375 * declaration header. Report the error and switch the encoding
1376 * to ISO-Latin-1 (if you don't like this policy, just declare the
1377 * encoding !)
1378 */
1379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1380 ctxt->sax->error(ctxt->userData,
1381 "Input is not proper UTF-8, indicate encoding !\n");
1382 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1383 ctxt->input->cur[0], ctxt->input->cur[1],
1384 ctxt->input->cur[2], ctxt->input->cur[3]);
1385 }
1386 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1387
1388 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1389 *len = 1;
1390 return((int) *ctxt->input->cur);
1391}
1392
1393/**
1394 * xmlStringCurrentChar:
1395 * @ctxt: the XML parser context
1396 * @cur: pointer to the beginning of the char
1397 * @len: pointer to the length of the char read
1398 *
1399 * The current char value, if using UTF-8 this may actaully span multiple
1400 * bytes in the input buffer.
1401 *
1402 * Returns the current char value and its lenght
1403 */
1404
1405int
1406xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001407 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001408 /*
1409 * We are supposed to handle UTF8, check it's valid
1410 * From rfc2044: encoding of the Unicode values on UTF-8:
1411 *
1412 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1413 * 0000 0000-0000 007F 0xxxxxxx
1414 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1415 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1416 *
1417 * Check for the 0x110000 limit too
1418 */
1419 unsigned char c;
1420 unsigned int val;
1421
1422 c = *cur;
1423 if (c & 0x80) {
1424 if ((cur[1] & 0xc0) != 0x80)
1425 goto encoding_error;
1426 if ((c & 0xe0) == 0xe0) {
1427
1428 if ((cur[2] & 0xc0) != 0x80)
1429 goto encoding_error;
1430 if ((c & 0xf0) == 0xf0) {
1431 if (((c & 0xf8) != 0xf0) ||
1432 ((cur[3] & 0xc0) != 0x80))
1433 goto encoding_error;
1434 /* 4-byte code */
1435 *len = 4;
1436 val = (cur[0] & 0x7) << 18;
1437 val |= (cur[1] & 0x3f) << 12;
1438 val |= (cur[2] & 0x3f) << 6;
1439 val |= cur[3] & 0x3f;
1440 } else {
1441 /* 3-byte code */
1442 *len = 3;
1443 val = (cur[0] & 0xf) << 12;
1444 val |= (cur[1] & 0x3f) << 6;
1445 val |= cur[2] & 0x3f;
1446 }
1447 } else {
1448 /* 2-byte code */
1449 *len = 2;
1450 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001451 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001452 }
1453 if (!IS_CHAR(val)) {
1454 if ((ctxt->sax != NULL) &&
1455 (ctxt->sax->error != NULL))
1456 ctxt->sax->error(ctxt->userData,
1457 "Char 0x%X out of allowed range\n", val);
1458 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1459 ctxt->wellFormed = 0;
1460 ctxt->disableSAX = 1;
1461 }
1462 return(val);
1463 } else {
1464 /* 1-byte code */
1465 *len = 1;
1466 return((int) *cur);
1467 }
1468 }
1469 /*
1470 * Assume it's a fixed lenght encoding (1) with
1471 * a compatibke encoding for the ASCII set, since
1472 * XML constructs only use < 128 chars
1473 */
1474 *len = 1;
1475 return((int) *cur);
1476encoding_error:
1477 /*
1478 * If we detect an UTF8 error that probably mean that the
1479 * input encoding didn't get properly advertized in the
1480 * declaration header. Report the error and switch the encoding
1481 * to ISO-Latin-1 (if you don't like this policy, just declare the
1482 * encoding !)
1483 */
1484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1485 ctxt->sax->error(ctxt->userData,
1486 "Input is not proper UTF-8, indicate encoding !\n");
1487 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1488 ctxt->input->cur[0], ctxt->input->cur[1],
1489 ctxt->input->cur[2], ctxt->input->cur[3]);
1490 }
1491 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1492
1493 *len = 1;
1494 return((int) *cur);
1495}
1496
1497/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001498 * xmlCopyCharMultiByte:
1499 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001500 * @val: the char value
1501 *
1502 * append the char value in the array
1503 *
1504 * Returns the number of xmlChar written
1505 */
Owen Taylor3473f882001-02-23 17:55:21 +00001506int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001508 /*
1509 * We are supposed to handle UTF8, check it's valid
1510 * From rfc2044: encoding of the Unicode values on UTF-8:
1511 *
1512 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1513 * 0000 0000-0000 007F 0xxxxxxx
1514 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1515 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1516 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001517 if (val >= 0x80) {
1518 xmlChar *savedout = out;
1519 int bits;
1520 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1521 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1522 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1523 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001524 xmlGenericError(xmlGenericErrorContext,
1525 "Internal error, xmlCopyChar 0x%X out of bound\n",
1526 val);
1527 return(0);
1528 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001529 for ( ; bits >= 0; bits-= 6)
1530 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1531 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001532 }
1533 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001534 return 1;
1535}
1536
1537/**
1538 * xmlCopyChar:
1539 * @len: Ignored, compatibility
1540 * @out: pointer to an arry of xmlChar
1541 * @val: the char value
1542 *
1543 * append the char value in the array
1544 *
1545 * Returns the number of xmlChar written
1546 */
1547
1548int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001549xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001550 /* the len parameter is ignored */
1551 if (val >= 0x80) {
1552 return(xmlCopyCharMultiByte (out, val));
1553 }
1554 *out = (xmlChar) val;
1555 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001556}
1557
1558/************************************************************************
1559 * *
1560 * Commodity functions to switch encodings *
1561 * *
1562 ************************************************************************/
1563
1564/**
1565 * xmlSwitchEncoding:
1566 * @ctxt: the parser context
1567 * @enc: the encoding value (number)
1568 *
1569 * change the input functions when discovering the character encoding
1570 * of a given entity.
1571 *
1572 * Returns 0 in case of success, -1 otherwise
1573 */
1574int
1575xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1576{
1577 xmlCharEncodingHandlerPtr handler;
1578
1579 switch (enc) {
1580 case XML_CHAR_ENCODING_ERROR:
1581 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1583 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1584 ctxt->wellFormed = 0;
1585 ctxt->disableSAX = 1;
1586 break;
1587 case XML_CHAR_ENCODING_NONE:
1588 /* let's assume it's UTF-8 without the XML decl */
1589 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1590 return(0);
1591 case XML_CHAR_ENCODING_UTF8:
1592 /* default encoding, no conversion should be needed */
1593 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001594
1595 /*
1596 * Errata on XML-1.0 June 20 2001
1597 * Specific handling of the Byte Order Mark for
1598 * UTF-8
1599 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001600 if ((ctxt->input != NULL) &&
1601 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001602 (ctxt->input->cur[1] == 0xBB) &&
1603 (ctxt->input->cur[2] == 0xBF)) {
1604 ctxt->input->cur += 3;
1605 }
Owen Taylor3473f882001-02-23 17:55:21 +00001606 return(0);
1607 default:
1608 break;
1609 }
1610 handler = xmlGetCharEncodingHandler(enc);
1611 if (handler == NULL) {
1612 /*
1613 * Default handlers.
1614 */
1615 switch (enc) {
1616 case XML_CHAR_ENCODING_ERROR:
1617 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1619 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1620 ctxt->wellFormed = 0;
1621 ctxt->disableSAX = 1;
1622 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1623 break;
1624 case XML_CHAR_ENCODING_NONE:
1625 /* let's assume it's UTF-8 without the XML decl */
1626 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1627 return(0);
1628 case XML_CHAR_ENCODING_UTF8:
1629 case XML_CHAR_ENCODING_ASCII:
1630 /* default encoding, no conversion should be needed */
1631 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1632 return(0);
1633 case XML_CHAR_ENCODING_UTF16LE:
1634 break;
1635 case XML_CHAR_ENCODING_UTF16BE:
1636 break;
1637 case XML_CHAR_ENCODING_UCS4LE:
1638 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1640 ctxt->sax->error(ctxt->userData,
1641 "char encoding USC4 little endian not supported\n");
1642 break;
1643 case XML_CHAR_ENCODING_UCS4BE:
1644 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1646 ctxt->sax->error(ctxt->userData,
1647 "char encoding USC4 big endian not supported\n");
1648 break;
1649 case XML_CHAR_ENCODING_EBCDIC:
1650 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1652 ctxt->sax->error(ctxt->userData,
1653 "char encoding EBCDIC not supported\n");
1654 break;
1655 case XML_CHAR_ENCODING_UCS4_2143:
1656 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "char encoding UCS4 2143 not supported\n");
1660 break;
1661 case XML_CHAR_ENCODING_UCS4_3412:
1662 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664 ctxt->sax->error(ctxt->userData,
1665 "char encoding UCS4 3412 not supported\n");
1666 break;
1667 case XML_CHAR_ENCODING_UCS2:
1668 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1670 ctxt->sax->error(ctxt->userData,
1671 "char encoding UCS2 not supported\n");
1672 break;
1673 case XML_CHAR_ENCODING_8859_1:
1674 case XML_CHAR_ENCODING_8859_2:
1675 case XML_CHAR_ENCODING_8859_3:
1676 case XML_CHAR_ENCODING_8859_4:
1677 case XML_CHAR_ENCODING_8859_5:
1678 case XML_CHAR_ENCODING_8859_6:
1679 case XML_CHAR_ENCODING_8859_7:
1680 case XML_CHAR_ENCODING_8859_8:
1681 case XML_CHAR_ENCODING_8859_9:
1682 /*
1683 * We used to keep the internal content in the
1684 * document encoding however this turns being unmaintainable
1685 * So xmlGetCharEncodingHandler() will return non-null
1686 * values for this now.
1687 */
1688 if ((ctxt->inputNr == 1) &&
1689 (ctxt->encoding == NULL) &&
1690 (ctxt->input->encoding != NULL)) {
1691 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1692 }
1693 ctxt->charset = enc;
1694 return(0);
1695 case XML_CHAR_ENCODING_2022_JP:
1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt->userData,
1699 "char encoding ISO-2022-JPnot supported\n");
1700 break;
1701 case XML_CHAR_ENCODING_SHIFT_JIS:
1702 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1704 ctxt->sax->error(ctxt->userData,
1705 "char encoding Shift_JIS not supported\n");
1706 break;
1707 case XML_CHAR_ENCODING_EUC_JP:
1708 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
1711 "char encoding EUC-JPnot supported\n");
1712 break;
1713 }
1714 }
1715 if (handler == NULL)
1716 return(-1);
1717 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1718 return(xmlSwitchToEncoding(ctxt, handler));
1719}
1720
1721/**
1722 * xmlSwitchToEncoding:
1723 * @ctxt: the parser context
1724 * @handler: the encoding handler
1725 *
1726 * change the input functions when discovering the character encoding
1727 * of a given entity.
1728 *
1729 * Returns 0 in case of success, -1 otherwise
1730 */
1731int
1732xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1733{
1734 int nbchars;
1735
1736 if (handler != NULL) {
1737 if (ctxt->input != NULL) {
1738 if (ctxt->input->buf != NULL) {
1739 if (ctxt->input->buf->encoder != NULL) {
1740 if (ctxt->input->buf->encoder == handler)
1741 return(0);
1742 /*
1743 * Note: this is a bit dangerous, but that's what it
1744 * takes to use nearly compatible signature for different
1745 * encodings.
1746 */
1747 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1748 ctxt->input->buf->encoder = handler;
1749 return(0);
1750 }
1751 ctxt->input->buf->encoder = handler;
1752
1753 /*
1754 * Is there already some content down the pipe to convert ?
1755 */
1756 if ((ctxt->input->buf->buffer != NULL) &&
1757 (ctxt->input->buf->buffer->use > 0)) {
1758 int processed;
1759
1760 /*
1761 * Specific handling of the Byte Order Mark for
1762 * UTF-16
1763 */
1764 if ((handler->name != NULL) &&
1765 (!strcmp(handler->name, "UTF-16LE")) &&
1766 (ctxt->input->cur[0] == 0xFF) &&
1767 (ctxt->input->cur[1] == 0xFE)) {
1768 ctxt->input->cur += 2;
1769 }
1770 if ((handler->name != NULL) &&
1771 (!strcmp(handler->name, "UTF-16BE")) &&
1772 (ctxt->input->cur[0] == 0xFE) &&
1773 (ctxt->input->cur[1] == 0xFF)) {
1774 ctxt->input->cur += 2;
1775 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001776 /*
1777 * Errata on XML-1.0 June 20 2001
1778 * Specific handling of the Byte Order Mark for
1779 * UTF-8
1780 */
1781 if ((handler->name != NULL) &&
1782 (!strcmp(handler->name, "UTF-8")) &&
1783 (ctxt->input->cur[0] == 0xEF) &&
1784 (ctxt->input->cur[1] == 0xBB) &&
1785 (ctxt->input->cur[1] == 0xBF)) {
1786 ctxt->input->cur += 3;
1787 }
Owen Taylor3473f882001-02-23 17:55:21 +00001788
1789 /*
1790 * Shring the current input buffer.
1791 * Move it as the raw buffer and create a new input buffer
1792 */
1793 processed = ctxt->input->cur - ctxt->input->base;
1794 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1795 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1796 ctxt->input->buf->buffer = xmlBufferCreate();
1797
1798 if (ctxt->html) {
1799 /*
1800 * converst as much as possbile of the buffer
1801 */
1802 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1803 ctxt->input->buf->buffer,
1804 ctxt->input->buf->raw);
1805 } else {
1806 /*
1807 * convert just enough to get
1808 * '<?xml version="1.0" encoding="xxx"?>'
1809 * parsed with the autodetected encoding
1810 * into the parser reading buffer.
1811 */
1812 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1813 ctxt->input->buf->buffer,
1814 ctxt->input->buf->raw);
1815 }
1816 if (nbchars < 0) {
1817 xmlGenericError(xmlGenericErrorContext,
1818 "xmlSwitchToEncoding: encoder error\n");
1819 return(-1);
1820 }
1821 ctxt->input->base =
1822 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001823 ctxt->input->end =
1824 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001825
1826 }
1827 return(0);
1828 } else {
1829 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1830 /*
1831 * When parsing a static memory array one must know the
1832 * size to be able to convert the buffer.
1833 */
1834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1835 ctxt->sax->error(ctxt->userData,
1836 "xmlSwitchEncoding : no input\n");
1837 return(-1);
1838 } else {
1839 int processed;
1840
1841 /*
1842 * Shring the current input buffer.
1843 * Move it as the raw buffer and create a new input buffer
1844 */
1845 processed = ctxt->input->cur - ctxt->input->base;
1846
1847 ctxt->input->buf->raw = xmlBufferCreate();
1848 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1849 ctxt->input->length - processed);
1850 ctxt->input->buf->buffer = xmlBufferCreate();
1851
1852 /*
1853 * convert as much as possible of the raw input
1854 * to the parser reading buffer.
1855 */
1856 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1857 ctxt->input->buf->buffer,
1858 ctxt->input->buf->raw);
1859 if (nbchars < 0) {
1860 xmlGenericError(xmlGenericErrorContext,
1861 "xmlSwitchToEncoding: encoder error\n");
1862 return(-1);
1863 }
1864
1865 /*
1866 * Conversion succeeded, get rid of the old buffer
1867 */
1868 if ((ctxt->input->free != NULL) &&
1869 (ctxt->input->base != NULL))
1870 ctxt->input->free((xmlChar *) ctxt->input->base);
1871 ctxt->input->base =
1872 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001873 ctxt->input->end =
1874 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001875 }
1876 }
1877 } else {
1878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1879 ctxt->sax->error(ctxt->userData,
1880 "xmlSwitchEncoding : no input\n");
1881 return(-1);
1882 }
1883 /*
1884 * The parsing is now done in UTF8 natively
1885 */
1886 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1887 } else
1888 return(-1);
1889 return(0);
1890
1891}
1892
1893/************************************************************************
1894 * *
1895 * Commodity functions to handle entities processing *
1896 * *
1897 ************************************************************************/
1898
1899/**
1900 * xmlFreeInputStream:
1901 * @input: an xmlParserInputPtr
1902 *
1903 * Free up an input stream.
1904 */
1905void
1906xmlFreeInputStream(xmlParserInputPtr input) {
1907 if (input == NULL) return;
1908
1909 if (input->filename != NULL) xmlFree((char *) input->filename);
1910 if (input->directory != NULL) xmlFree((char *) input->directory);
1911 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1912 if (input->version != NULL) xmlFree((char *) input->version);
1913 if ((input->free != NULL) && (input->base != NULL))
1914 input->free((xmlChar *) input->base);
1915 if (input->buf != NULL)
1916 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001917 xmlFree(input);
1918}
1919
1920/**
1921 * xmlNewInputStream:
1922 * @ctxt: an XML parser context
1923 *
1924 * Create a new input stream structure
1925 * Returns the new input stream or NULL
1926 */
1927xmlParserInputPtr
1928xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1929 xmlParserInputPtr input;
1930
1931 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1932 if (input == NULL) {
1933 if (ctxt != NULL) {
1934 ctxt->errNo = XML_ERR_NO_MEMORY;
1935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1936 ctxt->sax->error(ctxt->userData,
1937 "malloc: couldn't allocate a new input stream\n");
1938 ctxt->errNo = XML_ERR_NO_MEMORY;
1939 }
1940 return(NULL);
1941 }
1942 memset(input, 0, sizeof(xmlParserInput));
1943 input->line = 1;
1944 input->col = 1;
1945 input->standalone = -1;
1946 return(input);
1947}
1948
1949/**
1950 * xmlNewIOInputStream:
1951 * @ctxt: an XML parser context
1952 * @input: an I/O Input
1953 * @enc: the charset encoding if known
1954 *
1955 * Create a new input stream structure encapsulating the @input into
1956 * a stream suitable for the parser.
1957 *
1958 * Returns the new input stream or NULL
1959 */
1960xmlParserInputPtr
1961xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1962 xmlCharEncoding enc) {
1963 xmlParserInputPtr inputStream;
1964
1965 if (xmlParserDebugEntities)
1966 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1967 inputStream = xmlNewInputStream(ctxt);
1968 if (inputStream == NULL) {
1969 return(NULL);
1970 }
1971 inputStream->filename = NULL;
1972 inputStream->buf = input;
1973 inputStream->base = inputStream->buf->buffer->content;
1974 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001975 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001976 if (enc != XML_CHAR_ENCODING_NONE) {
1977 xmlSwitchEncoding(ctxt, enc);
1978 }
1979
1980 return(inputStream);
1981}
1982
1983/**
1984 * xmlNewEntityInputStream:
1985 * @ctxt: an XML parser context
1986 * @entity: an Entity pointer
1987 *
1988 * Create a new input stream based on an xmlEntityPtr
1989 *
1990 * Returns the new input stream or NULL
1991 */
1992xmlParserInputPtr
1993xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1994 xmlParserInputPtr input;
1995
1996 if (entity == NULL) {
1997 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1999 ctxt->sax->error(ctxt->userData,
2000 "internal: xmlNewEntityInputStream entity = NULL\n");
2001 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2002 return(NULL);
2003 }
2004 if (xmlParserDebugEntities)
2005 xmlGenericError(xmlGenericErrorContext,
2006 "new input from entity: %s\n", entity->name);
2007 if (entity->content == NULL) {
2008 switch (entity->etype) {
2009 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2010 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData,
2013 "xmlNewEntityInputStream unparsed entity !\n");
2014 break;
2015 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2016 case XML_EXTERNAL_PARAMETER_ENTITY:
2017 return(xmlLoadExternalEntity((char *) entity->URI,
2018 (char *) entity->ExternalID, ctxt));
2019 case XML_INTERNAL_GENERAL_ENTITY:
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData,
2022 "Internal entity %s without content !\n", entity->name);
2023 break;
2024 case XML_INTERNAL_PARAMETER_ENTITY:
2025 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2027 ctxt->sax->error(ctxt->userData,
2028 "Internal parameter entity %s without content !\n", entity->name);
2029 break;
2030 case XML_INTERNAL_PREDEFINED_ENTITY:
2031 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2033 ctxt->sax->error(ctxt->userData,
2034 "Predefined entity %s without content !\n", entity->name);
2035 break;
2036 }
2037 return(NULL);
2038 }
2039 input = xmlNewInputStream(ctxt);
2040 if (input == NULL) {
2041 return(NULL);
2042 }
2043 input->filename = (char *) entity->URI;
2044 input->base = entity->content;
2045 input->cur = entity->content;
2046 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002047 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002048 return(input);
2049}
2050
2051/**
2052 * xmlNewStringInputStream:
2053 * @ctxt: an XML parser context
2054 * @buffer: an memory buffer
2055 *
2056 * Create a new input stream based on a memory buffer.
2057 * Returns the new input stream
2058 */
2059xmlParserInputPtr
2060xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2061 xmlParserInputPtr input;
2062
2063 if (buffer == NULL) {
2064 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2066 ctxt->sax->error(ctxt->userData,
2067 "internal: xmlNewStringInputStream string = NULL\n");
2068 return(NULL);
2069 }
2070 if (xmlParserDebugEntities)
2071 xmlGenericError(xmlGenericErrorContext,
2072 "new fixed input: %.30s\n", buffer);
2073 input = xmlNewInputStream(ctxt);
2074 if (input == NULL) {
2075 return(NULL);
2076 }
2077 input->base = buffer;
2078 input->cur = buffer;
2079 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002080 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002081 return(input);
2082}
2083
2084/**
2085 * xmlNewInputFromFile:
2086 * @ctxt: an XML parser context
2087 * @filename: the filename to use as entity
2088 *
2089 * Create a new input stream based on a file.
2090 *
2091 * Returns the new input stream or NULL in case of error
2092 */
2093xmlParserInputPtr
2094xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2095 xmlParserInputBufferPtr buf;
2096 xmlParserInputPtr inputStream;
2097 char *directory = NULL;
2098 xmlChar *URI = NULL;
2099
2100 if (xmlParserDebugEntities)
2101 xmlGenericError(xmlGenericErrorContext,
2102 "new input from file: %s\n", filename);
2103 if (ctxt == NULL) return(NULL);
2104 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2105 if (buf == NULL)
2106 return(NULL);
2107
2108 URI = xmlStrdup((xmlChar *) filename);
2109 directory = xmlParserGetDirectory((const char *) URI);
2110
2111 inputStream = xmlNewInputStream(ctxt);
2112 if (inputStream == NULL) {
2113 if (directory != NULL) xmlFree((char *) directory);
2114 if (URI != NULL) xmlFree((char *) URI);
2115 return(NULL);
2116 }
2117
2118 inputStream->filename = (const char *) URI;
2119 inputStream->directory = directory;
2120 inputStream->buf = buf;
2121
2122 inputStream->base = inputStream->buf->buffer->content;
2123 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002124 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002125 if ((ctxt->directory == NULL) && (directory != NULL))
2126 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2127 return(inputStream);
2128}
2129
2130/************************************************************************
2131 * *
2132 * Commodity functions to handle parser contexts *
2133 * *
2134 ************************************************************************/
2135
2136/**
2137 * xmlInitParserCtxt:
2138 * @ctxt: an XML parser context
2139 *
2140 * Initialize a parser context
2141 */
2142
2143void
2144xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2145{
2146 xmlSAXHandler *sax;
2147
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002148 if(ctxt==NULL) {
2149 xmlGenericError(xmlGenericErrorContext,
2150 "xmlInitParserCtxt: NULL context given\n");
2151 return;
2152 }
2153
Owen Taylor3473f882001-02-23 17:55:21 +00002154 xmlDefaultSAXHandlerInit();
2155
2156 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2157 if (sax == NULL) {
2158 xmlGenericError(xmlGenericErrorContext,
2159 "xmlInitParserCtxt: out of memory\n");
2160 }
2161 else
2162 memset(sax, 0, sizeof(xmlSAXHandler));
2163
2164 /* Allocate the Input stack */
2165 ctxt->inputTab = (xmlParserInputPtr *)
2166 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2167 if (ctxt->inputTab == NULL) {
2168 xmlGenericError(xmlGenericErrorContext,
2169 "xmlInitParserCtxt: out of memory\n");
2170 ctxt->inputNr = 0;
2171 ctxt->inputMax = 0;
2172 ctxt->input = NULL;
2173 return;
2174 }
2175 ctxt->inputNr = 0;
2176 ctxt->inputMax = 5;
2177 ctxt->input = NULL;
2178
2179 ctxt->version = NULL;
2180 ctxt->encoding = NULL;
2181 ctxt->standalone = -1;
2182 ctxt->hasExternalSubset = 0;
2183 ctxt->hasPErefs = 0;
2184 ctxt->html = 0;
2185 ctxt->external = 0;
2186 ctxt->instate = XML_PARSER_START;
2187 ctxt->token = 0;
2188 ctxt->directory = NULL;
2189
2190 /* Allocate the Node stack */
2191 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2192 if (ctxt->nodeTab == NULL) {
2193 xmlGenericError(xmlGenericErrorContext,
2194 "xmlInitParserCtxt: out of memory\n");
2195 ctxt->nodeNr = 0;
2196 ctxt->nodeMax = 0;
2197 ctxt->node = NULL;
2198 ctxt->inputNr = 0;
2199 ctxt->inputMax = 0;
2200 ctxt->input = NULL;
2201 return;
2202 }
2203 ctxt->nodeNr = 0;
2204 ctxt->nodeMax = 10;
2205 ctxt->node = NULL;
2206
2207 /* Allocate the Name stack */
2208 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2209 if (ctxt->nameTab == NULL) {
2210 xmlGenericError(xmlGenericErrorContext,
2211 "xmlInitParserCtxt: out of memory\n");
2212 ctxt->nodeNr = 0;
2213 ctxt->nodeMax = 0;
2214 ctxt->node = NULL;
2215 ctxt->inputNr = 0;
2216 ctxt->inputMax = 0;
2217 ctxt->input = NULL;
2218 ctxt->nameNr = 0;
2219 ctxt->nameMax = 0;
2220 ctxt->name = NULL;
2221 return;
2222 }
2223 ctxt->nameNr = 0;
2224 ctxt->nameMax = 10;
2225 ctxt->name = NULL;
2226
2227 /* Allocate the space stack */
2228 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2229 if (ctxt->spaceTab == NULL) {
2230 xmlGenericError(xmlGenericErrorContext,
2231 "xmlInitParserCtxt: out of memory\n");
2232 ctxt->nodeNr = 0;
2233 ctxt->nodeMax = 0;
2234 ctxt->node = NULL;
2235 ctxt->inputNr = 0;
2236 ctxt->inputMax = 0;
2237 ctxt->input = NULL;
2238 ctxt->nameNr = 0;
2239 ctxt->nameMax = 0;
2240 ctxt->name = NULL;
2241 ctxt->spaceNr = 0;
2242 ctxt->spaceMax = 0;
2243 ctxt->space = NULL;
2244 return;
2245 }
2246 ctxt->spaceNr = 1;
2247 ctxt->spaceMax = 10;
2248 ctxt->spaceTab[0] = -1;
2249 ctxt->space = &ctxt->spaceTab[0];
2250
Daniel Veillard14be0a12001-03-03 18:50:55 +00002251 ctxt->sax = sax;
2252 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2253
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->userData = ctxt;
2255 ctxt->myDoc = NULL;
2256 ctxt->wellFormed = 1;
2257 ctxt->valid = 1;
2258 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2259 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2260 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002261 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002262 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002263 if (ctxt->keepBlanks == 0)
2264 sax->ignorableWhitespace = ignorableWhitespace;
2265
Owen Taylor3473f882001-02-23 17:55:21 +00002266 ctxt->vctxt.userData = ctxt;
2267 if (ctxt->validate) {
2268 ctxt->vctxt.error = xmlParserValidityError;
2269 if (xmlGetWarningsDefaultValue == 0)
2270 ctxt->vctxt.warning = NULL;
2271 else
2272 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002273 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002274 } else {
2275 ctxt->vctxt.error = NULL;
2276 ctxt->vctxt.warning = NULL;
2277 }
2278 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2279 ctxt->record_info = 0;
2280 ctxt->nbChars = 0;
2281 ctxt->checkIndex = 0;
2282 ctxt->inSubset = 0;
2283 ctxt->errNo = XML_ERR_OK;
2284 ctxt->depth = 0;
2285 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002286 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002287 xmlInitNodeInfoSeq(&ctxt->node_seq);
2288}
2289
2290/**
2291 * xmlFreeParserCtxt:
2292 * @ctxt: an XML parser context
2293 *
2294 * Free all the memory used by a parser context. However the parsed
2295 * document in ctxt->myDoc is not freed.
2296 */
2297
2298void
2299xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2300{
2301 xmlParserInputPtr input;
2302 xmlChar *oldname;
2303
2304 if (ctxt == NULL) return;
2305
2306 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2307 xmlFreeInputStream(input);
2308 }
2309 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2310 xmlFree(oldname);
2311 }
2312 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2313 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2314 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2315 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2316 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2317 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2318 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2319 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2320 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002321 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2322 xmlFree(ctxt->sax);
2323 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002324 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002325#ifdef LIBXML_CATALOG_ENABLED
2326 if (ctxt->catalogs != NULL)
2327 xmlCatalogFreeLocal(ctxt->catalogs);
2328#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002329 xmlFree(ctxt);
2330}
2331
2332/**
2333 * xmlNewParserCtxt:
2334 *
2335 * Allocate and initialize a new parser context.
2336 *
2337 * Returns the xmlParserCtxtPtr or NULL
2338 */
2339
2340xmlParserCtxtPtr
2341xmlNewParserCtxt()
2342{
2343 xmlParserCtxtPtr ctxt;
2344
2345 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2346 if (ctxt == NULL) {
2347 xmlGenericError(xmlGenericErrorContext,
2348 "xmlNewParserCtxt : cannot allocate context\n");
2349 perror("malloc");
2350 return(NULL);
2351 }
2352 memset(ctxt, 0, sizeof(xmlParserCtxt));
2353 xmlInitParserCtxt(ctxt);
2354 return(ctxt);
2355}
2356
2357/************************************************************************
2358 * *
2359 * Handling of node informations *
2360 * *
2361 ************************************************************************/
2362
2363/**
2364 * xmlClearParserCtxt:
2365 * @ctxt: an XML parser context
2366 *
2367 * Clear (release owned resources) and reinitialize a parser context
2368 */
2369
2370void
2371xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2372{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002373 if (ctxt==NULL)
2374 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002375 xmlClearNodeInfoSeq(&ctxt->node_seq);
2376 xmlInitParserCtxt(ctxt);
2377}
2378
2379/**
2380 * xmlParserFindNodeInfo:
2381 * @ctxt: an XML parser context
2382 * @node: an XML node within the tree
2383 *
2384 * Find the parser node info struct for a given node
2385 *
2386 * Returns an xmlParserNodeInfo block pointer or NULL
2387 */
2388const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2389 const xmlNode* node)
2390{
2391 unsigned long pos;
2392
2393 /* Find position where node should be at */
2394 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002395 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002396 return &ctx->node_seq.buffer[pos];
2397 else
2398 return NULL;
2399}
2400
2401
2402/**
2403 * xmlInitNodeInfoSeq:
2404 * @seq: a node info sequence pointer
2405 *
2406 * -- Initialize (set to initial state) node info sequence
2407 */
2408void
2409xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2410{
2411 seq->length = 0;
2412 seq->maximum = 0;
2413 seq->buffer = NULL;
2414}
2415
2416/**
2417 * xmlClearNodeInfoSeq:
2418 * @seq: a node info sequence pointer
2419 *
2420 * -- Clear (release memory and reinitialize) node
2421 * info sequence
2422 */
2423void
2424xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2425{
2426 if ( seq->buffer != NULL )
2427 xmlFree(seq->buffer);
2428 xmlInitNodeInfoSeq(seq);
2429}
2430
2431
2432/**
2433 * xmlParserFindNodeInfoIndex:
2434 * @seq: a node info sequence pointer
2435 * @node: an XML node pointer
2436 *
2437 *
2438 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2439 * the given node is or should be at in a sorted sequence
2440 *
2441 * Returns a long indicating the position of the record
2442 */
2443unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2444 const xmlNode* node)
2445{
2446 unsigned long upper, lower, middle;
2447 int found = 0;
2448
2449 /* Do a binary search for the key */
2450 lower = 1;
2451 upper = seq->length;
2452 middle = 0;
2453 while ( lower <= upper && !found) {
2454 middle = lower + (upper - lower) / 2;
2455 if ( node == seq->buffer[middle - 1].node )
2456 found = 1;
2457 else if ( node < seq->buffer[middle - 1].node )
2458 upper = middle - 1;
2459 else
2460 lower = middle + 1;
2461 }
2462
2463 /* Return position */
2464 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2465 return middle;
2466 else
2467 return middle - 1;
2468}
2469
2470
2471/**
2472 * xmlParserAddNodeInfo:
2473 * @ctxt: an XML parser context
2474 * @info: a node info sequence pointer
2475 *
2476 * Insert node info record into the sorted sequence
2477 */
2478void
2479xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2480 const xmlParserNodeInfo* info)
2481{
2482 unsigned long pos;
2483 static unsigned int block_size = 5;
2484
2485 /* Find pos and check to see if node is already in the sequence */
2486 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2487 if ( pos < ctxt->node_seq.length
2488 && ctxt->node_seq.buffer[pos].node == info->node ) {
2489 ctxt->node_seq.buffer[pos] = *info;
2490 }
2491
2492 /* Otherwise, we need to add new node to buffer */
2493 else {
2494 /* Expand buffer by 5 if needed */
2495 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2496 xmlParserNodeInfo* tmp_buffer;
2497 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2498 *(ctxt->node_seq.maximum + block_size));
2499
2500 if ( ctxt->node_seq.buffer == NULL )
2501 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2502 else
2503 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2504
2505 if ( tmp_buffer == NULL ) {
2506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2507 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2508 ctxt->errNo = XML_ERR_NO_MEMORY;
2509 return;
2510 }
2511 ctxt->node_seq.buffer = tmp_buffer;
2512 ctxt->node_seq.maximum += block_size;
2513 }
2514
2515 /* If position is not at end, move elements out of the way */
2516 if ( pos != ctxt->node_seq.length ) {
2517 unsigned long i;
2518
2519 for ( i = ctxt->node_seq.length; i > pos; i-- )
2520 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2521 }
2522
2523 /* Copy element and increase length */
2524 ctxt->node_seq.buffer[pos] = *info;
2525 ctxt->node_seq.length++;
2526 }
2527}
2528
2529/************************************************************************
2530 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002531 * Defaults settings *
2532 * *
2533 ************************************************************************/
2534/**
2535 * xmlPedanticParserDefault:
2536 * @val: int 0 or 1
2537 *
2538 * Set and return the previous value for enabling pedantic warnings.
2539 *
2540 * Returns the last value for 0 for no substitution, 1 for substitution.
2541 */
2542
2543int
2544xmlPedanticParserDefault(int val) {
2545 int old = xmlPedanticParserDefaultValue;
2546
2547 xmlPedanticParserDefaultValue = val;
2548 return(old);
2549}
2550
2551/**
2552 * xmlLineNumbersDefault:
2553 * @val: int 0 or 1
2554 *
2555 * Set and return the previous value for enabling line numbers in elements
2556 * contents. This may break on old application and is turned off by default.
2557 *
2558 * Returns the last value for 0 for no substitution, 1 for substitution.
2559 */
2560
2561int
2562xmlLineNumbersDefault(int val) {
2563 int old = xmlLineNumbersDefaultValue;
2564
2565 xmlLineNumbersDefaultValue = val;
2566 return(old);
2567}
2568
2569/**
2570 * xmlSubstituteEntitiesDefault:
2571 * @val: int 0 or 1
2572 *
2573 * Set and return the previous value for default entity support.
2574 * Initially the parser always keep entity references instead of substituting
2575 * entity values in the output. This function has to be used to change the
2576 * default parser behaviour
2577 * SAX::subtituteEntities() has to be used for changing that on a file by
2578 * file basis.
2579 *
2580 * Returns the last value for 0 for no substitution, 1 for substitution.
2581 */
2582
2583int
2584xmlSubstituteEntitiesDefault(int val) {
2585 int old = xmlSubstituteEntitiesDefaultValue;
2586
2587 xmlSubstituteEntitiesDefaultValue = val;
2588 return(old);
2589}
2590
2591/**
2592 * xmlKeepBlanksDefault:
2593 * @val: int 0 or 1
2594 *
2595 * Set and return the previous value for default blanks text nodes support.
2596 * The 1.x version of the parser used an heuristic to try to detect
2597 * ignorable white spaces. As a result the SAX callback was generating
2598 * ignorableWhitespace() callbacks instead of characters() one, and when
2599 * using the DOM output text nodes containing those blanks were not generated.
2600 * The 2.x and later version will switch to the XML standard way and
2601 * ignorableWhitespace() are only generated when running the parser in
2602 * validating mode and when the current element doesn't allow CDATA or
2603 * mixed content.
2604 * This function is provided as a way to force the standard behaviour
2605 * on 1.X libs and to switch back to the old mode for compatibility when
2606 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2607 * by using xmlIsBlankNode() commodity function to detect the "empty"
2608 * nodes generated.
2609 * This value also affect autogeneration of indentation when saving code
2610 * if blanks sections are kept, indentation is not generated.
2611 *
2612 * Returns the last value for 0 for no substitution, 1 for substitution.
2613 */
2614
2615int
2616xmlKeepBlanksDefault(int val) {
2617 int old = xmlKeepBlanksDefaultValue;
2618
2619 xmlKeepBlanksDefaultValue = val;
2620 xmlIndentTreeOutput = !val;
2621 return(old);
2622}
2623
2624/************************************************************************
2625 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002626 * Deprecated functions kept for compatibility *
2627 * *
2628 ************************************************************************/
2629
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002630/**
2631 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002632 * @lang: pointer to the string value
2633 *
2634 * Checks that the value conforms to the LanguageID production:
2635 *
2636 * NOTE: this is somewhat deprecated, those productions were removed from
2637 * the XML Second edition.
2638 *
2639 * [33] LanguageID ::= Langcode ('-' Subcode)*
2640 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2641 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2642 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2643 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2644 * [38] Subcode ::= ([a-z] | [A-Z])+
2645 *
2646 * Returns 1 if correct 0 otherwise
2647 **/
2648int
2649xmlCheckLanguageID(const xmlChar *lang) {
2650 const xmlChar *cur = lang;
2651
2652 if (cur == NULL)
2653 return(0);
2654 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2655 ((cur[0] == 'I') && (cur[1] == '-'))) {
2656 /*
2657 * IANA code
2658 */
2659 cur += 2;
2660 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2661 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2662 cur++;
2663 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2664 ((cur[0] == 'X') && (cur[1] == '-'))) {
2665 /*
2666 * User code
2667 */
2668 cur += 2;
2669 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2670 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2671 cur++;
2672 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2673 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2674 /*
2675 * ISO639
2676 */
2677 cur++;
2678 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2679 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2680 cur++;
2681 else
2682 return(0);
2683 } else
2684 return(0);
2685 while (cur[0] != 0) { /* non input consuming */
2686 if (cur[0] != '-')
2687 return(0);
2688 cur++;
2689 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2690 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2691 cur++;
2692 else
2693 return(0);
2694 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2695 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2696 cur++;
2697 }
2698 return(1);
2699}
2700
2701/**
2702 * xmlDecodeEntities:
2703 * @ctxt: the parser context
2704 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2705 * @len: the len to decode (in bytes !), -1 for no size limit
2706 * @end: an end marker xmlChar, 0 if none
2707 * @end2: an end marker xmlChar, 0 if none
2708 * @end3: an end marker xmlChar, 0 if none
2709 *
2710 * This function is deprecated, we now always process entities content
2711 * through xmlStringDecodeEntities
2712 *
2713 * TODO: remove it in next major release.
2714 *
2715 * [67] Reference ::= EntityRef | CharRef
2716 *
2717 * [69] PEReference ::= '%' Name ';'
2718 *
2719 * Returns A newly allocated string with the substitution done. The caller
2720 * must deallocate it !
2721 */
2722xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002723xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2724 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002725#if 0
2726 xmlChar *buffer = NULL;
2727 unsigned int buffer_size = 0;
2728 unsigned int nbchars = 0;
2729
2730 xmlChar *current = NULL;
2731 xmlEntityPtr ent;
2732 unsigned int max = (unsigned int) len;
2733 int c,l;
2734#endif
2735
2736 static int deprecated = 0;
2737 if (!deprecated) {
2738 xmlGenericError(xmlGenericErrorContext,
2739 "xmlDecodeEntities() deprecated function reached\n");
2740 deprecated = 1;
2741 }
2742
2743#if 0
2744 if (ctxt->depth > 40) {
2745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2746 ctxt->sax->error(ctxt->userData,
2747 "Detected entity reference loop\n");
2748 ctxt->wellFormed = 0;
2749 ctxt->disableSAX = 1;
2750 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2751 return(NULL);
2752 }
2753
2754 /*
2755 * allocate a translation buffer.
2756 */
2757 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2758 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2759 if (buffer == NULL) {
2760 perror("xmlDecodeEntities: malloc failed");
2761 return(NULL);
2762 }
2763
2764 /*
2765 * Ok loop until we reach one of the ending char or a size limit.
2766 */
2767 GROW;
2768 c = CUR_CHAR(l);
2769 while ((nbchars < max) && (c != end) && /* NOTUSED */
2770 (c != end2) && (c != end3)) {
2771 GROW;
2772 if (c == 0) break;
2773 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2774 int val = xmlParseCharRef(ctxt);
2775 COPY_BUF(0,buffer,nbchars,val);
2776 NEXTL(l);
2777 } else if ((c == '&') && (ctxt->token != '&') &&
2778 (what & XML_SUBSTITUTE_REF)) {
2779 if (xmlParserDebugEntities)
2780 xmlGenericError(xmlGenericErrorContext,
2781 "decoding Entity Reference\n");
2782 ent = xmlParseEntityRef(ctxt);
2783 if ((ent != NULL) &&
2784 (ctxt->replaceEntities != 0)) {
2785 current = ent->content;
2786 while (*current != 0) { /* non input consuming loop */
2787 buffer[nbchars++] = *current++;
2788 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2789 growBuffer(buffer);
2790 }
2791 }
2792 } else if (ent != NULL) {
2793 const xmlChar *cur = ent->name;
2794
2795 buffer[nbchars++] = '&';
2796 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2797 growBuffer(buffer);
2798 }
2799 while (*cur != 0) { /* non input consuming loop */
2800 buffer[nbchars++] = *cur++;
2801 }
2802 buffer[nbchars++] = ';';
2803 }
2804 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2805 /*
2806 * a PEReference induce to switch the entity flow,
2807 * we break here to flush the current set of chars
2808 * parsed if any. We will be called back later.
2809 */
2810 if (xmlParserDebugEntities)
2811 xmlGenericError(xmlGenericErrorContext,
2812 "decoding PE Reference\n");
2813 if (nbchars != 0) break;
2814
2815 xmlParsePEReference(ctxt);
2816
2817 /*
2818 * Pop-up of finished entities.
2819 */
2820 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2821 xmlPopInput(ctxt);
2822
2823 break;
2824 } else {
2825 COPY_BUF(l,buffer,nbchars,c);
2826 NEXTL(l);
2827 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2828 growBuffer(buffer);
2829 }
2830 }
2831 c = CUR_CHAR(l);
2832 }
2833 buffer[nbchars++] = 0;
2834 return(buffer);
2835#endif
2836 return(NULL);
2837}
2838
2839/**
2840 * xmlNamespaceParseNCName:
2841 * @ctxt: an XML parser context
2842 *
2843 * parse an XML namespace name.
2844 *
2845 * TODO: this seems not in use anymore, the namespace handling is done on
2846 * top of the SAX interfaces, i.e. not on raw input.
2847 *
2848 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2849 *
2850 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2851 * CombiningChar | Extender
2852 *
2853 * Returns the namespace name or NULL
2854 */
2855
2856xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002857xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002858#if 0
2859 xmlChar buf[XML_MAX_NAMELEN + 5];
2860 int len = 0, l;
2861 int cur = CUR_CHAR(l);
2862#endif
2863
2864 static int deprecated = 0;
2865 if (!deprecated) {
2866 xmlGenericError(xmlGenericErrorContext,
2867 "xmlNamespaceParseNCName() deprecated function reached\n");
2868 deprecated = 1;
2869 }
2870
2871#if 0
2872 /* load first the value of the char !!! */
2873 GROW;
2874 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2875
2876xmlGenericError(xmlGenericErrorContext,
2877 "xmlNamespaceParseNCName: reached loop 3\n");
2878 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2879 (cur == '.') || (cur == '-') ||
2880 (cur == '_') ||
2881 (IS_COMBINING(cur)) ||
2882 (IS_EXTENDER(cur))) {
2883 COPY_BUF(l,buf,len,cur);
2884 NEXTL(l);
2885 cur = CUR_CHAR(l);
2886 if (len >= XML_MAX_NAMELEN) {
2887 xmlGenericError(xmlGenericErrorContext,
2888 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2889 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2890 (cur == '.') || (cur == '-') ||
2891 (cur == '_') ||
2892 (IS_COMBINING(cur)) ||
2893 (IS_EXTENDER(cur))) {
2894 NEXTL(l);
2895 cur = CUR_CHAR(l);
2896 }
2897 break;
2898 }
2899 }
2900 return(xmlStrndup(buf, len));
2901#endif
2902 return(NULL);
2903}
2904
2905/**
2906 * xmlNamespaceParseQName:
2907 * @ctxt: an XML parser context
2908 * @prefix: a xmlChar **
2909 *
2910 * TODO: this seems not in use anymore, the namespace handling is done on
2911 * top of the SAX interfaces, i.e. not on raw input.
2912 *
2913 * parse an XML qualified name
2914 *
2915 * [NS 5] QName ::= (Prefix ':')? LocalPart
2916 *
2917 * [NS 6] Prefix ::= NCName
2918 *
2919 * [NS 7] LocalPart ::= NCName
2920 *
2921 * Returns the local part, and prefix is updated
2922 * to get the Prefix if any.
2923 */
2924
2925xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002926xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002927
2928 static int deprecated = 0;
2929 if (!deprecated) {
2930 xmlGenericError(xmlGenericErrorContext,
2931 "xmlNamespaceParseQName() deprecated function reached\n");
2932 deprecated = 1;
2933 }
2934
2935#if 0
2936 xmlChar *ret = NULL;
2937
2938 *prefix = NULL;
2939 ret = xmlNamespaceParseNCName(ctxt);
2940 if (RAW == ':') {
2941 *prefix = ret;
2942 NEXT;
2943 ret = xmlNamespaceParseNCName(ctxt);
2944 }
2945
2946 return(ret);
2947#endif
2948 return(NULL);
2949}
2950
2951/**
2952 * xmlNamespaceParseNSDef:
2953 * @ctxt: an XML parser context
2954 *
2955 * parse a namespace prefix declaration
2956 *
2957 * TODO: this seems not in use anymore, the namespace handling is done on
2958 * top of the SAX interfaces, i.e. not on raw input.
2959 *
2960 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2961 *
2962 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2963 *
2964 * Returns the namespace name
2965 */
2966
2967xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002968xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002969 static int deprecated = 0;
2970 if (!deprecated) {
2971 xmlGenericError(xmlGenericErrorContext,
2972 "xmlNamespaceParseNSDef() deprecated function reached\n");
2973 deprecated = 1;
2974 }
2975 return(NULL);
2976#if 0
2977 xmlChar *name = NULL;
2978
2979 if ((RAW == 'x') && (NXT(1) == 'm') &&
2980 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2981 (NXT(4) == 's')) {
2982 SKIP(5);
2983 if (RAW == ':') {
2984 NEXT;
2985 name = xmlNamespaceParseNCName(ctxt);
2986 }
2987 }
2988 return(name);
2989#endif
2990}
2991
2992/**
2993 * xmlParseQuotedString:
2994 * @ctxt: an XML parser context
2995 *
2996 * Parse and return a string between quotes or doublequotes
2997 *
2998 * TODO: Deprecated, to be removed at next drop of binary compatibility
2999 *
3000 * Returns the string parser or NULL.
3001 */
3002xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003003xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003004 static int deprecated = 0;
3005 if (!deprecated) {
3006 xmlGenericError(xmlGenericErrorContext,
3007 "xmlParseQuotedString() deprecated function reached\n");
3008 deprecated = 1;
3009 }
3010 return(NULL);
3011
3012#if 0
3013 xmlChar *buf = NULL;
3014 int len = 0,l;
3015 int size = XML_PARSER_BUFFER_SIZE;
3016 int c;
3017
3018 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3019 if (buf == NULL) {
3020 xmlGenericError(xmlGenericErrorContext,
3021 "malloc of %d byte failed\n", size);
3022 return(NULL);
3023 }
3024xmlGenericError(xmlGenericErrorContext,
3025 "xmlParseQuotedString: reached loop 4\n");
3026 if (RAW == '"') {
3027 NEXT;
3028 c = CUR_CHAR(l);
3029 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3030 if (len + 5 >= size) {
3031 size *= 2;
3032 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3033 if (buf == NULL) {
3034 xmlGenericError(xmlGenericErrorContext,
3035 "realloc of %d byte failed\n", size);
3036 return(NULL);
3037 }
3038 }
3039 COPY_BUF(l,buf,len,c);
3040 NEXTL(l);
3041 c = CUR_CHAR(l);
3042 }
3043 if (c != '"') {
3044 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3046 ctxt->sax->error(ctxt->userData,
3047 "String not closed \"%.50s\"\n", buf);
3048 ctxt->wellFormed = 0;
3049 ctxt->disableSAX = 1;
3050 } else {
3051 NEXT;
3052 }
3053 } else if (RAW == '\''){
3054 NEXT;
3055 c = CUR;
3056 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3057 if (len + 1 >= size) {
3058 size *= 2;
3059 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3060 if (buf == NULL) {
3061 xmlGenericError(xmlGenericErrorContext,
3062 "realloc of %d byte failed\n", size);
3063 return(NULL);
3064 }
3065 }
3066 buf[len++] = c;
3067 NEXT;
3068 c = CUR;
3069 }
3070 if (RAW != '\'') {
3071 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3073 ctxt->sax->error(ctxt->userData,
3074 "String not closed \"%.50s\"\n", buf);
3075 ctxt->wellFormed = 0;
3076 ctxt->disableSAX = 1;
3077 } else {
3078 NEXT;
3079 }
3080 }
3081 return(buf);
3082#endif
3083}
3084
3085/**
3086 * xmlParseNamespace:
3087 * @ctxt: an XML parser context
3088 *
3089 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3090 *
3091 * This is what the older xml-name Working Draft specified, a bunch of
3092 * other stuff may still rely on it, so support is still here as
3093 * if it was declared on the root of the Tree:-(
3094 *
3095 * TODO: remove from library
3096 *
3097 * To be removed at next drop of binary compatibility
3098 */
3099
3100void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003101xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003102 static int deprecated = 0;
3103 if (!deprecated) {
3104 xmlGenericError(xmlGenericErrorContext,
3105 "xmlParseNamespace() deprecated function reached\n");
3106 deprecated = 1;
3107 }
3108
3109#if 0
3110 xmlChar *href = NULL;
3111 xmlChar *prefix = NULL;
3112 int garbage = 0;
3113
3114 /*
3115 * We just skipped "namespace" or "xml:namespace"
3116 */
3117 SKIP_BLANKS;
3118
3119xmlGenericError(xmlGenericErrorContext,
3120 "xmlParseNamespace: reached loop 5\n");
3121 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3122 /*
3123 * We can have "ns" or "prefix" attributes
3124 * Old encoding as 'href' or 'AS' attributes is still supported
3125 */
3126 if ((RAW == 'n') && (NXT(1) == 's')) {
3127 garbage = 0;
3128 SKIP(2);
3129 SKIP_BLANKS;
3130
3131 if (RAW != '=') continue;
3132 NEXT;
3133 SKIP_BLANKS;
3134
3135 href = xmlParseQuotedString(ctxt);
3136 SKIP_BLANKS;
3137 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3138 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3139 garbage = 0;
3140 SKIP(4);
3141 SKIP_BLANKS;
3142
3143 if (RAW != '=') continue;
3144 NEXT;
3145 SKIP_BLANKS;
3146
3147 href = xmlParseQuotedString(ctxt);
3148 SKIP_BLANKS;
3149 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3150 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3151 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3152 garbage = 0;
3153 SKIP(6);
3154 SKIP_BLANKS;
3155
3156 if (RAW != '=') continue;
3157 NEXT;
3158 SKIP_BLANKS;
3159
3160 prefix = xmlParseQuotedString(ctxt);
3161 SKIP_BLANKS;
3162 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3163 garbage = 0;
3164 SKIP(2);
3165 SKIP_BLANKS;
3166
3167 if (RAW != '=') continue;
3168 NEXT;
3169 SKIP_BLANKS;
3170
3171 prefix = xmlParseQuotedString(ctxt);
3172 SKIP_BLANKS;
3173 } else if ((RAW == '?') && (NXT(1) == '>')) {
3174 garbage = 0;
3175 NEXT;
3176 } else {
3177 /*
3178 * Found garbage when parsing the namespace
3179 */
3180 if (!garbage) {
3181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3182 ctxt->sax->error(ctxt->userData,
3183 "xmlParseNamespace found garbage\n");
3184 }
3185 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 NEXT;
3189 }
3190 }
3191
3192 MOVETO_ENDTAG(CUR_PTR);
3193 NEXT;
3194
3195 /*
3196 * Register the DTD.
3197 if (href != NULL)
3198 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3199 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3200 */
3201
3202 if (prefix != NULL) xmlFree(prefix);
3203 if (href != NULL) xmlFree(href);
3204#endif
3205}
3206
3207/**
3208 * xmlScanName:
3209 * @ctxt: an XML parser context
3210 *
3211 * Trickery: parse an XML name but without consuming the input flow
3212 * Needed for rollback cases. Used only when parsing entities references.
3213 *
3214 * TODO: seems deprecated now, only used in the default part of
3215 * xmlParserHandleReference
3216 *
3217 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3218 * CombiningChar | Extender
3219 *
3220 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3221 *
3222 * [6] Names ::= Name (S Name)*
3223 *
3224 * Returns the Name parsed or NULL
3225 */
3226
3227xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003228xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003229 static int deprecated = 0;
3230 if (!deprecated) {
3231 xmlGenericError(xmlGenericErrorContext,
3232 "xmlScanName() deprecated function reached\n");
3233 deprecated = 1;
3234 }
3235 return(NULL);
3236
3237#if 0
3238 xmlChar buf[XML_MAX_NAMELEN];
3239 int len = 0;
3240
3241 GROW;
3242 if (!IS_LETTER(RAW) && (RAW != '_') &&
3243 (RAW != ':')) {
3244 return(NULL);
3245 }
3246
3247
3248 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3249 (NXT(len) == '.') || (NXT(len) == '-') ||
3250 (NXT(len) == '_') || (NXT(len) == ':') ||
3251 (IS_COMBINING(NXT(len))) ||
3252 (IS_EXTENDER(NXT(len)))) {
3253 GROW;
3254 buf[len] = NXT(len);
3255 len++;
3256 if (len >= XML_MAX_NAMELEN) {
3257 xmlGenericError(xmlGenericErrorContext,
3258 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3259 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3260 (IS_DIGIT(NXT(len))) ||
3261 (NXT(len) == '.') || (NXT(len) == '-') ||
3262 (NXT(len) == '_') || (NXT(len) == ':') ||
3263 (IS_COMBINING(NXT(len))) ||
3264 (IS_EXTENDER(NXT(len))))
3265 len++;
3266 break;
3267 }
3268 }
3269 return(xmlStrndup(buf, len));
3270#endif
3271}
3272
3273/**
3274 * xmlParserHandleReference:
3275 * @ctxt: the parser context
3276 *
3277 * TODO: Remove, now deprecated ... the test is done directly in the
3278 * content parsing
3279 * routines.
3280 *
3281 * [67] Reference ::= EntityRef | CharRef
3282 *
3283 * [68] EntityRef ::= '&' Name ';'
3284 *
3285 * [ WFC: Entity Declared ]
3286 * the Name given in the entity reference must match that in an entity
3287 * declaration, except that well-formed documents need not declare any
3288 * of the following entities: amp, lt, gt, apos, quot.
3289 *
3290 * [ WFC: Parsed Entity ]
3291 * An entity reference must not contain the name of an unparsed entity
3292 *
3293 * [66] CharRef ::= '&#' [0-9]+ ';' |
3294 * '&#x' [0-9a-fA-F]+ ';'
3295 *
3296 * A PEReference may have been detectect in the current input stream
3297 * the handling is done accordingly to
3298 * http://www.w3.org/TR/REC-xml#entproc
3299 */
3300void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003301xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003302 static int deprecated = 0;
3303 if (!deprecated) {
3304 xmlGenericError(xmlGenericErrorContext,
3305 "xmlParserHandleReference() deprecated function reached\n");
3306 deprecated = 1;
3307 }
3308
3309#if 0
3310 xmlParserInputPtr input;
3311 xmlChar *name;
3312 xmlEntityPtr ent = NULL;
3313
3314 if (ctxt->token != 0) {
3315 return;
3316 }
3317 if (RAW != '&') return;
3318 GROW;
3319 if ((RAW == '&') && (NXT(1) == '#')) {
3320 switch(ctxt->instate) {
3321 case XML_PARSER_ENTITY_DECL:
3322 case XML_PARSER_PI:
3323 case XML_PARSER_CDATA_SECTION:
3324 case XML_PARSER_COMMENT:
3325 case XML_PARSER_SYSTEM_LITERAL:
3326 /* we just ignore it there */
3327 return;
3328 case XML_PARSER_START_TAG:
3329 return;
3330 case XML_PARSER_END_TAG:
3331 return;
3332 case XML_PARSER_EOF:
3333 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3335 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3336 ctxt->wellFormed = 0;
3337 ctxt->disableSAX = 1;
3338 return;
3339 case XML_PARSER_PROLOG:
3340 case XML_PARSER_START:
3341 case XML_PARSER_MISC:
3342 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 return;
3348 case XML_PARSER_EPILOG:
3349 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 return;
3355 case XML_PARSER_DTD:
3356 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData,
3359 "CharRef are forbiden in DTDs!\n");
3360 ctxt->wellFormed = 0;
3361 ctxt->disableSAX = 1;
3362 return;
3363 case XML_PARSER_ENTITY_VALUE:
3364 /*
3365 * NOTE: in the case of entity values, we don't do the
3366 * substitution here since we need the literal
3367 * entity value to be able to save the internal
3368 * subset of the document.
3369 * This will be handled by xmlStringDecodeEntities
3370 */
3371 return;
3372 case XML_PARSER_CONTENT:
3373 return;
3374 case XML_PARSER_ATTRIBUTE_VALUE:
3375 /* ctxt->token = xmlParseCharRef(ctxt); */
3376 return;
3377 case XML_PARSER_IGNORE:
3378 return;
3379 }
3380 return;
3381 }
3382
3383 switch(ctxt->instate) {
3384 case XML_PARSER_CDATA_SECTION:
3385 return;
3386 case XML_PARSER_PI:
3387 case XML_PARSER_COMMENT:
3388 case XML_PARSER_SYSTEM_LITERAL:
3389 case XML_PARSER_CONTENT:
3390 return;
3391 case XML_PARSER_START_TAG:
3392 return;
3393 case XML_PARSER_END_TAG:
3394 return;
3395 case XML_PARSER_EOF:
3396 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3398 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3399 ctxt->wellFormed = 0;
3400 ctxt->disableSAX = 1;
3401 return;
3402 case XML_PARSER_PROLOG:
3403 case XML_PARSER_START:
3404 case XML_PARSER_MISC:
3405 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3407 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3408 ctxt->wellFormed = 0;
3409 ctxt->disableSAX = 1;
3410 return;
3411 case XML_PARSER_EPILOG:
3412 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3415 ctxt->wellFormed = 0;
3416 ctxt->disableSAX = 1;
3417 return;
3418 case XML_PARSER_ENTITY_VALUE:
3419 /*
3420 * NOTE: in the case of entity values, we don't do the
3421 * substitution here since we need the literal
3422 * entity value to be able to save the internal
3423 * subset of the document.
3424 * This will be handled by xmlStringDecodeEntities
3425 */
3426 return;
3427 case XML_PARSER_ATTRIBUTE_VALUE:
3428 /*
3429 * NOTE: in the case of attributes values, we don't do the
3430 * substitution here unless we are in a mode where
3431 * the parser is explicitely asked to substitute
3432 * entities. The SAX callback is called with values
3433 * without entity substitution.
3434 * This will then be handled by xmlStringDecodeEntities
3435 */
3436 return;
3437 case XML_PARSER_ENTITY_DECL:
3438 /*
3439 * we just ignore it there
3440 * the substitution will be done once the entity is referenced
3441 */
3442 return;
3443 case XML_PARSER_DTD:
3444 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3446 ctxt->sax->error(ctxt->userData,
3447 "Entity references are forbiden in DTDs!\n");
3448 ctxt->wellFormed = 0;
3449 ctxt->disableSAX = 1;
3450 return;
3451 case XML_PARSER_IGNORE:
3452 return;
3453 }
3454
3455/* TODO: this seems not reached anymore .... Verify ... */
3456xmlGenericError(xmlGenericErrorContext,
3457 "Reached deprecated section in xmlParserHandleReference()\n");
3458xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003459 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003460xmlGenericError(xmlGenericErrorContext,
3461 "indicating the version: %s, thanks !\n", xmlParserVersion);
3462 NEXT;
3463 name = xmlScanName(ctxt);
3464 if (name == NULL) {
3465 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3468 ctxt->wellFormed = 0;
3469 ctxt->disableSAX = 1;
3470 ctxt->token = '&';
3471 return;
3472 }
3473 if (NXT(xmlStrlen(name)) != ';') {
3474 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3476 ctxt->sax->error(ctxt->userData,
3477 "Entity reference: ';' expected\n");
3478 ctxt->wellFormed = 0;
3479 ctxt->disableSAX = 1;
3480 ctxt->token = '&';
3481 xmlFree(name);
3482 return;
3483 }
3484 SKIP(xmlStrlen(name) + 1);
3485 if (ctxt->sax != NULL) {
3486 if (ctxt->sax->getEntity != NULL)
3487 ent = ctxt->sax->getEntity(ctxt->userData, name);
3488 }
3489
3490 /*
3491 * [ WFC: Entity Declared ]
3492 * the Name given in the entity reference must match that in an entity
3493 * declaration, except that well-formed documents need not declare any
3494 * of the following entities: amp, lt, gt, apos, quot.
3495 */
3496 if (ent == NULL)
3497 ent = xmlGetPredefinedEntity(name);
3498 if (ent == NULL) {
3499 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3501 ctxt->sax->error(ctxt->userData,
3502 "Entity reference: entity %s not declared\n",
3503 name);
3504 ctxt->wellFormed = 0;
3505 ctxt->disableSAX = 1;
3506 xmlFree(name);
3507 return;
3508 }
3509
3510 /*
3511 * [ WFC: Parsed Entity ]
3512 * An entity reference must not contain the name of an unparsed entity
3513 */
3514 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3515 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3517 ctxt->sax->error(ctxt->userData,
3518 "Entity reference to unparsed entity %s\n", name);
3519 ctxt->wellFormed = 0;
3520 ctxt->disableSAX = 1;
3521 }
3522
3523 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3524 ctxt->token = ent->content[0];
3525 xmlFree(name);
3526 return;
3527 }
3528 input = xmlNewEntityInputStream(ctxt, ent);
3529 xmlPushInput(ctxt, input);
3530 xmlFree(name);
3531#endif
3532 return;
3533}
3534
3535/**
3536 * xmlHandleEntity:
3537 * @ctxt: an XML parser context
3538 * @entity: an XML entity pointer.
3539 *
3540 * Default handling of defined entities, when should we define a new input
3541 * stream ? When do we just handle that as a set of chars ?
3542 *
3543 * OBSOLETE: to be removed at some point.
3544 */
3545
3546void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003547xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003548 static int deprecated = 0;
3549 if (!deprecated) {
3550 xmlGenericError(xmlGenericErrorContext,
3551 "xmlHandleEntity() deprecated function reached\n");
3552 deprecated = 1;
3553 }
3554
3555#if 0
3556 int len;
3557 xmlParserInputPtr input;
3558
3559 if (entity->content == NULL) {
3560 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3562 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3563 entity->name);
3564 ctxt->wellFormed = 0;
3565 ctxt->disableSAX = 1;
3566 return;
3567 }
3568 len = xmlStrlen(entity->content);
3569 if (len <= 2) goto handle_as_char;
3570
3571 /*
3572 * Redefine its content as an input stream.
3573 */
3574 input = xmlNewEntityInputStream(ctxt, entity);
3575 xmlPushInput(ctxt, input);
3576 return;
3577
3578handle_as_char:
3579 /*
3580 * Just handle the content as a set of chars.
3581 */
3582 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3583 (ctxt->sax->characters != NULL))
3584 ctxt->sax->characters(ctxt->userData, entity->content, len);
3585#endif
3586}
3587
3588/**
3589 * xmlNewGlobalNs:
3590 * @doc: the document carrying the namespace
3591 * @href: the URI associated
3592 * @prefix: the prefix for the namespace
3593 *
3594 * Creation of a Namespace, the old way using PI and without scoping
3595 * DEPRECATED !!!
3596 * It now create a namespace on the root element of the document if found.
3597 * Returns NULL this functionnality had been removed
3598 */
3599xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003600xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3601 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003602 static int deprecated = 0;
3603 if (!deprecated) {
3604 xmlGenericError(xmlGenericErrorContext,
3605 "xmlNewGlobalNs() deprecated function reached\n");
3606 deprecated = 1;
3607 }
3608 return(NULL);
3609#if 0
3610 xmlNodePtr root;
3611
3612 xmlNsPtr cur;
3613
3614 root = xmlDocGetRootElement(doc);
3615 if (root != NULL)
3616 return(xmlNewNs(root, href, prefix));
3617
3618 /*
3619 * if there is no root element yet, create an old Namespace type
3620 * and it will be moved to the root at save time.
3621 */
3622 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3623 if (cur == NULL) {
3624 xmlGenericError(xmlGenericErrorContext,
3625 "xmlNewGlobalNs : malloc failed\n");
3626 return(NULL);
3627 }
3628 memset(cur, 0, sizeof(xmlNs));
3629 cur->type = XML_GLOBAL_NAMESPACE;
3630
3631 if (href != NULL)
3632 cur->href = xmlStrdup(href);
3633 if (prefix != NULL)
3634 cur->prefix = xmlStrdup(prefix);
3635
3636 /*
3637 * Add it at the end to preserve parsing order ...
3638 */
3639 if (doc != NULL) {
3640 if (doc->oldNs == NULL) {
3641 doc->oldNs = cur;
3642 } else {
3643 xmlNsPtr prev = doc->oldNs;
3644
3645 while (prev->next != NULL) prev = prev->next;
3646 prev->next = cur;
3647 }
3648 }
3649
3650 return(NULL);
3651#endif
3652}
3653
3654/**
3655 * xmlUpgradeOldNs:
3656 * @doc: a document pointer
3657 *
3658 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3659 * DEPRECATED
3660 */
3661void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003662xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003663 static int deprecated = 0;
3664 if (!deprecated) {
3665 xmlGenericError(xmlGenericErrorContext,
3666 "xmlNewGlobalNs() deprecated function reached\n");
3667 deprecated = 1;
3668 }
3669#if 0
3670 xmlNsPtr cur;
3671
3672 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3673 if (doc->children == NULL) {
3674#ifdef DEBUG_TREE
3675 xmlGenericError(xmlGenericErrorContext,
3676 "xmlUpgradeOldNs: failed no root !\n");
3677#endif
3678 return;
3679 }
3680
3681 cur = doc->oldNs;
3682 while (cur->next != NULL) {
3683 cur->type = XML_LOCAL_NAMESPACE;
3684 cur = cur->next;
3685 }
3686 cur->type = XML_LOCAL_NAMESPACE;
3687 cur->next = doc->children->nsDef;
3688 doc->children->nsDef = doc->oldNs;
3689 doc->oldNs = NULL;
3690#endif
3691}
3692