blob: 40aa0b1628a78cc15f0f5cdc77f73f2d6fc9c429 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000049#ifdef LIBXML_CATALOG_ENABLED
50#include <libxml/catalog.h>
51#endif
Owen Taylor3473f882001-02-23 17:55:21 +000052
Daniel Veillard56a4cb82001-03-24 17:00:36 +000053void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillarda53c6882001-07-25 17:18:57 +000055/*
56 * Various global defaults for parsing
57 */
58int xmlGetWarningsDefaultValue = 1;
59#ifdef VMS
60int xmlSubstituteEntitiesDefaultVal = 0;
61#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
62int xmlDoValidityCheckingDefaultVal = 0;
63#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
64#else
65int xmlSubstituteEntitiesDefaultValue = 0;
66int xmlDoValidityCheckingDefaultValue = 0;
67#endif
68int xmlLoadExtDtdDefaultValue = 0;
69int xmlPedanticParserDefaultValue = 0;
70int xmlLineNumbersDefaultValue = 0;
71int xmlKeepBlanksDefaultValue = 1;
72
Owen Taylor3473f882001-02-23 17:55:21 +000073/************************************************************************
74 * *
75 * Version and Features handling *
76 * *
77 ************************************************************************/
78const char *xmlParserVersion = LIBXML_VERSION_STRING;
79
Daniel Veillard5e2dace2001-07-18 19:30:27 +000080/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * xmlCheckVersion:
82 * @version: the include version number
83 *
84 * check the compiled lib version against the include one.
85 * This can warn or immediately kill the application
86 */
87void
88xmlCheckVersion(int version) {
89 int myversion = (int) LIBXML_VERSION;
90
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000091 xmlInitMemory();
92
Owen Taylor3473f882001-02-23 17:55:21 +000093 if ((myversion / 10000) != (version / 10000)) {
94 xmlGenericError(xmlGenericErrorContext,
95 "Fatal: program compiled against libxml %d using libxml %d\n",
96 (version / 10000), (myversion / 10000));
97 exit(1);
98 }
99 if ((myversion / 100) < (version / 100)) {
100 xmlGenericError(xmlGenericErrorContext,
101 "Warning: program compiled against libxml %d using older %d\n",
102 (version / 100), (myversion / 100));
103 }
104}
105
106
Daniel Veillard22090732001-07-16 00:06:07 +0000107static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000108 "validate",
109 "load subset",
110 "keep blanks",
111 "disable SAX",
112 "fetch external entities",
113 "substitute entities",
114 "gather line info",
115 "user data",
116 "is html",
117 "is standalone",
118 "stop parser",
119 "document",
120 "is well formed",
121 "is valid",
122 "SAX block",
123 "SAX function internalSubset",
124 "SAX function isStandalone",
125 "SAX function hasInternalSubset",
126 "SAX function hasExternalSubset",
127 "SAX function resolveEntity",
128 "SAX function getEntity",
129 "SAX function entityDecl",
130 "SAX function notationDecl",
131 "SAX function attributeDecl",
132 "SAX function elementDecl",
133 "SAX function unparsedEntityDecl",
134 "SAX function setDocumentLocator",
135 "SAX function startDocument",
136 "SAX function endDocument",
137 "SAX function startElement",
138 "SAX function endElement",
139 "SAX function reference",
140 "SAX function characters",
141 "SAX function ignorableWhitespace",
142 "SAX function processingInstruction",
143 "SAX function comment",
144 "SAX function warning",
145 "SAX function error",
146 "SAX function fatalError",
147 "SAX function getParameterEntity",
148 "SAX function cdataBlock",
149 "SAX function externalSubset",
150};
151
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152/**
Owen Taylor3473f882001-02-23 17:55:21 +0000153 * xmlGetFeaturesList:
154 * @len: the length of the features name array (input/output)
155 * @result: an array of string to be filled with the features name.
156 *
157 * Copy at most *@len feature names into the @result array
158 *
159 * Returns -1 in case or error, or the total number of features,
160 * len is updated with the number of strings copied,
161 * strings must not be deallocated
162 */
163int
164xmlGetFeaturesList(int *len, const char **result) {
165 int ret, i;
166
167 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
168 if ((len == NULL) || (result == NULL))
169 return(ret);
170 if ((*len < 0) || (*len >= 1000))
171 return(-1);
172 if (*len > ret)
173 *len = ret;
174 for (i = 0;i < *len;i++)
175 result[i] = xmlFeaturesList[i];
176 return(ret);
177}
178
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
Owen Taylor3473f882001-02-23 17:55:21 +0000180 * xmlGetFeature:
181 * @ctxt: an XML/HTML parser context
182 * @name: the feature name
183 * @result: location to store the result
184 *
185 * Read the current value of one feature of this parser instance
186 *
187 * Returns -1 in case or error, 0 otherwise
188 */
189int
190xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
191 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
192 return(-1);
193
194 if (!strcmp(name, "validate")) {
195 *((int *) result) = ctxt->validate;
196 } else if (!strcmp(name, "keep blanks")) {
197 *((int *) result) = ctxt->keepBlanks;
198 } else if (!strcmp(name, "disable SAX")) {
199 *((int *) result) = ctxt->disableSAX;
200 } else if (!strcmp(name, "fetch external entities")) {
201 *((int *) result) = ctxt->loadsubset;
202 } else if (!strcmp(name, "substitute entities")) {
203 *((int *) result) = ctxt->replaceEntities;
204 } else if (!strcmp(name, "gather line info")) {
205 *((int *) result) = ctxt->record_info;
206 } else if (!strcmp(name, "user data")) {
207 *((void **)result) = ctxt->userData;
208 } else if (!strcmp(name, "is html")) {
209 *((int *) result) = ctxt->html;
210 } else if (!strcmp(name, "is standalone")) {
211 *((int *) result) = ctxt->standalone;
212 } else if (!strcmp(name, "document")) {
213 *((xmlDocPtr *) result) = ctxt->myDoc;
214 } else if (!strcmp(name, "is well formed")) {
215 *((int *) result) = ctxt->wellFormed;
216 } else if (!strcmp(name, "is valid")) {
217 *((int *) result) = ctxt->valid;
218 } else if (!strcmp(name, "SAX block")) {
219 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
220 } else if (!strcmp(name, "SAX function internalSubset")) {
221 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
222 } else if (!strcmp(name, "SAX function isStandalone")) {
223 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
224 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
225 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
226 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
227 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
228 } else if (!strcmp(name, "SAX function resolveEntity")) {
229 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
230 } else if (!strcmp(name, "SAX function getEntity")) {
231 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
232 } else if (!strcmp(name, "SAX function entityDecl")) {
233 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
234 } else if (!strcmp(name, "SAX function notationDecl")) {
235 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
236 } else if (!strcmp(name, "SAX function attributeDecl")) {
237 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
238 } else if (!strcmp(name, "SAX function elementDecl")) {
239 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
240 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
241 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
242 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
243 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
244 } else if (!strcmp(name, "SAX function startDocument")) {
245 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
246 } else if (!strcmp(name, "SAX function endDocument")) {
247 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
248 } else if (!strcmp(name, "SAX function startElement")) {
249 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
250 } else if (!strcmp(name, "SAX function endElement")) {
251 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
252 } else if (!strcmp(name, "SAX function reference")) {
253 *((referenceSAXFunc *) result) = ctxt->sax->reference;
254 } else if (!strcmp(name, "SAX function characters")) {
255 *((charactersSAXFunc *) result) = ctxt->sax->characters;
256 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
257 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
258 } else if (!strcmp(name, "SAX function processingInstruction")) {
259 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
260 } else if (!strcmp(name, "SAX function comment")) {
261 *((commentSAXFunc *) result) = ctxt->sax->comment;
262 } else if (!strcmp(name, "SAX function warning")) {
263 *((warningSAXFunc *) result) = ctxt->sax->warning;
264 } else if (!strcmp(name, "SAX function error")) {
265 *((errorSAXFunc *) result) = ctxt->sax->error;
266 } else if (!strcmp(name, "SAX function fatalError")) {
267 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
268 } else if (!strcmp(name, "SAX function getParameterEntity")) {
269 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
270 } else if (!strcmp(name, "SAX function cdataBlock")) {
271 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
272 } else if (!strcmp(name, "SAX function externalSubset")) {
273 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
274 } else {
275 return(-1);
276 }
277 return(0);
278}
279
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000280/**
Owen Taylor3473f882001-02-23 17:55:21 +0000281 * xmlSetFeature:
282 * @ctxt: an XML/HTML parser context
283 * @name: the feature name
284 * @value: pointer to the location of the new value
285 *
286 * Change the current value of one feature of this parser instance
287 *
288 * Returns -1 in case or error, 0 otherwise
289 */
290int
291xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
292 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
293 return(-1);
294
295 if (!strcmp(name, "validate")) {
296 int newvalidate = *((int *) value);
297 if ((!ctxt->validate) && (newvalidate != 0)) {
298 if (ctxt->vctxt.warning == NULL)
299 ctxt->vctxt.warning = xmlParserValidityWarning;
300 if (ctxt->vctxt.error == NULL)
301 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000302 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000303 }
304 ctxt->validate = newvalidate;
305 } else if (!strcmp(name, "keep blanks")) {
306 ctxt->keepBlanks = *((int *) value);
307 } else if (!strcmp(name, "disable SAX")) {
308 ctxt->disableSAX = *((int *) value);
309 } else if (!strcmp(name, "fetch external entities")) {
310 ctxt->loadsubset = *((int *) value);
311 } else if (!strcmp(name, "substitute entities")) {
312 ctxt->replaceEntities = *((int *) value);
313 } else if (!strcmp(name, "gather line info")) {
314 ctxt->record_info = *((int *) value);
315 } else if (!strcmp(name, "user data")) {
316 ctxt->userData = *((void **)value);
317 } else if (!strcmp(name, "is html")) {
318 ctxt->html = *((int *) value);
319 } else if (!strcmp(name, "is standalone")) {
320 ctxt->standalone = *((int *) value);
321 } else if (!strcmp(name, "document")) {
322 ctxt->myDoc = *((xmlDocPtr *) value);
323 } else if (!strcmp(name, "is well formed")) {
324 ctxt->wellFormed = *((int *) value);
325 } else if (!strcmp(name, "is valid")) {
326 ctxt->valid = *((int *) value);
327 } else if (!strcmp(name, "SAX block")) {
328 ctxt->sax = *((xmlSAXHandlerPtr *) value);
329 } else if (!strcmp(name, "SAX function internalSubset")) {
330 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function isStandalone")) {
332 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
334 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
336 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function resolveEntity")) {
338 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
339 } else if (!strcmp(name, "SAX function getEntity")) {
340 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
341 } else if (!strcmp(name, "SAX function entityDecl")) {
342 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function notationDecl")) {
344 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function attributeDecl")) {
346 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function elementDecl")) {
348 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
350 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
352 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function startDocument")) {
354 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function endDocument")) {
356 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function startElement")) {
358 ctxt->sax->startElement = *((startElementSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function endElement")) {
360 ctxt->sax->endElement = *((endElementSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function reference")) {
362 ctxt->sax->reference = *((referenceSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function characters")) {
364 ctxt->sax->characters = *((charactersSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
366 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function processingInstruction")) {
368 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function comment")) {
370 ctxt->sax->comment = *((commentSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function warning")) {
372 ctxt->sax->warning = *((warningSAXFunc *) value);
373 } else if (!strcmp(name, "SAX function error")) {
374 ctxt->sax->error = *((errorSAXFunc *) value);
375 } else if (!strcmp(name, "SAX function fatalError")) {
376 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
377 } else if (!strcmp(name, "SAX function getParameterEntity")) {
378 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
379 } else if (!strcmp(name, "SAX function cdataBlock")) {
380 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
381 } else if (!strcmp(name, "SAX function externalSubset")) {
382 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
383 } else {
384 return(-1);
385 }
386 return(0);
387}
388
389/************************************************************************
390 * *
391 * Some functions to avoid too large macros *
392 * *
393 ************************************************************************/
394
395/**
396 * xmlIsChar:
397 * @c: an unicode character (int)
398 *
399 * Check whether the character is allowed by the production
400 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
401 * | [#x10000-#x10FFFF]
402 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
403 * Also available as a macro IS_CHAR()
404 *
405 * Returns 0 if not, non-zero otherwise
406 */
407int
408xmlIsChar(int c) {
409 return(
410 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
411 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
412 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
413 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
414}
415
416/**
417 * xmlIsBlank:
418 * @c: an unicode character (int)
419 *
420 * Check whether the character is allowed by the production
421 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
422 * Also available as a macro IS_BLANK()
423 *
424 * Returns 0 if not, non-zero otherwise
425 */
426int
427xmlIsBlank(int c) {
428 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
429}
430
431/**
432 * xmlIsBaseChar:
433 * @c: an unicode character (int)
434 *
435 * Check whether the character is allowed by the production
436 * [85] BaseChar ::= ... long list see REC ...
437 *
438 * VI is your friend !
439 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
440 * and
441 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
442 *
443 * Returns 0 if not, non-zero otherwise
444 */
445static int xmlBaseArray[] = {
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
450 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
452 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
458 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
459 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
461 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
462};
463
464int
465xmlIsBaseChar(int c) {
466 return(
467 (((c) < 0x0100) ? xmlBaseArray[c] :
468 ( /* accelerator */
469 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
470 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
471 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
472 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
473 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
474 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
475 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
476 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
477 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
478 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
479 ((c) == 0x0386) ||
480 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
481 ((c) == 0x038C) ||
482 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
483 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
484 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
485 ((c) == 0x03DA) ||
486 ((c) == 0x03DC) ||
487 ((c) == 0x03DE) ||
488 ((c) == 0x03E0) ||
489 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
490 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
491 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
492 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
493 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
494 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
495 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
496 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
497 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
498 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
499 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
500 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
501 ((c) == 0x0559) ||
502 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
503 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
504 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
505 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
506 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
507 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
508 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
509 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
510 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
511 ((c) == 0x06D5) ||
512 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
513 (((c) >= 0x905) && ( /* accelerator */
514 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
515 ((c) == 0x093D) ||
516 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
517 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
518 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
519 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
520 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
521 ((c) == 0x09B2) ||
522 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
523 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
524 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
525 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
526 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
527 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
528 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
529 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
530 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
531 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
532 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
533 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
534 ((c) == 0x0A5E) ||
535 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
536 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
537 ((c) == 0x0A8D) ||
538 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
539 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
540 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
541 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
542 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
543 ((c) == 0x0ABD) ||
544 ((c) == 0x0AE0) ||
545 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
546 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
547 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
548 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
549 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
550 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
551 ((c) == 0x0B3D) ||
552 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
553 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
554 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
555 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
556 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
557 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
558 ((c) == 0x0B9C) ||
559 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
560 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
561 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
562 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
563 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
564 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
565 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
566 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
567 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
568 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
569 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
570 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
571 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
572 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
573 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
574 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
575 ((c) == 0x0CDE) ||
576 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
577 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
578 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
579 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
580 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
581 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
582 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
583 ((c) == 0x0E30) ||
584 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
585 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
586 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
587 ((c) == 0x0E84) ||
588 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
589 ((c) == 0x0E8A) ||
590 ((c) == 0x0E8D) ||
591 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
592 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
593 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
594 ((c) == 0x0EA5) ||
595 ((c) == 0x0EA7) ||
596 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
597 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
598 ((c) == 0x0EB0) ||
599 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
600 ((c) == 0x0EBD) ||
601 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
602 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
603 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
604 (((c) >= 0x10A0) && ( /* accelerator */
605 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
606 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
607 ((c) == 0x1100) ||
608 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
609 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
610 ((c) == 0x1109) ||
611 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
612 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
613 ((c) == 0x113C) ||
614 ((c) == 0x113E) ||
615 ((c) == 0x1140) ||
616 ((c) == 0x114C) ||
617 ((c) == 0x114E) ||
618 ((c) == 0x1150) ||
619 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
620 ((c) == 0x1159) ||
621 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
622 ((c) == 0x1163) ||
623 ((c) == 0x1165) ||
624 ((c) == 0x1167) ||
625 ((c) == 0x1169) ||
626 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
627 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
628 ((c) == 0x1175) ||
629 ((c) == 0x119E) ||
630 ((c) == 0x11A8) ||
631 ((c) == 0x11AB) ||
632 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
633 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
634 ((c) == 0x11BA) ||
635 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
636 ((c) == 0x11EB) ||
637 ((c) == 0x11F0) ||
638 ((c) == 0x11F9) ||
639 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
640 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
641 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
642 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
643 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
644 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
645 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
646 ((c) == 0x1F59) ||
647 ((c) == 0x1F5B) ||
648 ((c) == 0x1F5D) ||
649 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
650 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
651 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
652 ((c) == 0x1FBE) ||
653 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
654 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
655 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
656 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
657 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
658 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
659 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
660 ((c) == 0x2126) ||
661 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
662 ((c) == 0x212E) ||
663 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
664 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
665 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
666 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
667 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
668}
669
670/**
671 * xmlIsDigit:
672 * @c: an unicode character (int)
673 *
674 * Check whether the character is allowed by the production
675 * [88] Digit ::= ... long list see REC ...
676 *
677 * Returns 0 if not, non-zero otherwise
678 */
679int
680xmlIsDigit(int c) {
681 return(
682 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
683 (((c) >= 0x660) && ( /* accelerator */
684 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
685 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
686 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
687 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
688 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
689 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
690 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
691 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
692 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
693 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
694 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
695 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
696 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
697 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
698}
699
700/**
701 * xmlIsCombining:
702 * @c: an unicode character (int)
703 *
704 * Check whether the character is allowed by the production
705 * [87] CombiningChar ::= ... long list see REC ...
706 *
707 * Returns 0 if not, non-zero otherwise
708 */
709int
710xmlIsCombining(int c) {
711 return(
712 (((c) >= 0x300) && ( /* accelerator */
713 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
714 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
715 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
716 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
717 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
718 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
719 ((c) == 0x05BF) ||
720 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
721 ((c) == 0x05C4) ||
722 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
723 ((c) == 0x0670) ||
724 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
725 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
726 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
727 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
728 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
729 (((c) >= 0x0901) && ( /* accelerator */
730 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
731 ((c) == 0x093C) ||
732 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
733 ((c) == 0x094D) ||
734 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
735 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
736 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
737 ((c) == 0x09BC) ||
738 ((c) == 0x09BE) ||
739 ((c) == 0x09BF) ||
740 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
741 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
742 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
743 ((c) == 0x09D7) ||
744 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
745 (((c) >= 0x0A02) && ( /* accelerator */
746 ((c) == 0x0A02) ||
747 ((c) == 0x0A3C) ||
748 ((c) == 0x0A3E) ||
749 ((c) == 0x0A3F) ||
750 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
751 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
752 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
753 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
754 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
755 ((c) == 0x0ABC) ||
756 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
757 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
758 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
759 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
760 ((c) == 0x0B3C) ||
761 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
762 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
763 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
764 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
765 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
766 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
767 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
768 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
769 ((c) == 0x0BD7) ||
770 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
771 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
772 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
773 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
774 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
775 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
776 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
777 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
778 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
779 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
780 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
781 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
782 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
783 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
784 ((c) == 0x0D57) ||
785 (((c) >= 0x0E31) && ( /* accelerator */
786 ((c) == 0x0E31) ||
787 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
788 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
789 ((c) == 0x0EB1) ||
790 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
791 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
792 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
793 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
794 ((c) == 0x0F35) ||
795 ((c) == 0x0F37) ||
796 ((c) == 0x0F39) ||
797 ((c) == 0x0F3E) ||
798 ((c) == 0x0F3F) ||
799 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
800 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
801 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
802 ((c) == 0x0F97) ||
803 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
804 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
805 ((c) == 0x0FB9) ||
806 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
807 ((c) == 0x20E1) ||
808 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
809 ((c) == 0x3099) ||
810 ((c) == 0x309A))))))))));
811}
812
813/**
814 * xmlIsExtender:
815 * @c: an unicode character (int)
816 *
817 * Check whether the character is allowed by the production
818 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
819 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
820 * [#x309D-#x309E] | [#x30FC-#x30FE]
821 *
822 * Returns 0 if not, non-zero otherwise
823 */
824int
825xmlIsExtender(int c) {
826 switch (c) {
827 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
828 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
829 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
830 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
831 case 0x30FE:
832 return 1;
833 default:
834 return 0;
835 }
836}
837
838/**
839 * xmlIsIdeographic:
840 * @c: an unicode character (int)
841 *
842 * Check whether the character is allowed by the production
843 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
844 *
845 * Returns 0 if not, non-zero otherwise
846 */
847int
848xmlIsIdeographic(int c) {
849 return(((c) < 0x0100) ? 0 :
850 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
851 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
852 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
853 ((c) == 0x3007));
854}
855
856/**
857 * xmlIsLetter:
858 * @c: an unicode character (int)
859 *
860 * Check whether the character is allowed by the production
861 * [84] Letter ::= BaseChar | Ideographic
862 *
863 * Returns 0 if not, non-zero otherwise
864 */
865int
866xmlIsLetter(int c) {
867 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
868}
869
870/**
871 * xmlIsPubidChar:
872 * @c: an unicode character (int)
873 *
874 * Check whether the character is allowed by the production
875 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
876 *
877 * Returns 0 if not, non-zero otherwise
878 */
879int
880xmlIsPubidChar(int c) {
881 return(
882 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
883 (((c) >= 'a') && ((c) <= 'z')) ||
884 (((c) >= 'A') && ((c) <= 'Z')) ||
885 (((c) >= '0') && ((c) <= '9')) ||
886 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
887 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
888 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
889 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
890 ((c) == '$') || ((c) == '_') || ((c) == '%'));
891}
892
893/************************************************************************
894 * *
895 * Input handling functions for progressive parsing *
896 * *
897 ************************************************************************/
898
899/* #define DEBUG_INPUT */
900/* #define DEBUG_STACK */
901/* #define DEBUG_PUSH */
902
903
904/* we need to keep enough input to show errors in context */
905#define LINE_LEN 80
906
907#ifdef DEBUG_INPUT
908#define CHECK_BUFFER(in) check_buffer(in)
909
910void check_buffer(xmlParserInputPtr in) {
911 if (in->base != in->buf->buffer->content) {
912 xmlGenericError(xmlGenericErrorContext,
913 "xmlParserInput: base mismatch problem\n");
914 }
915 if (in->cur < in->base) {
916 xmlGenericError(xmlGenericErrorContext,
917 "xmlParserInput: cur < base problem\n");
918 }
919 if (in->cur > in->base + in->buf->buffer->use) {
920 xmlGenericError(xmlGenericErrorContext,
921 "xmlParserInput: cur > base + use problem\n");
922 }
923 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
924 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
925 in->buf->buffer->use, in->buf->buffer->size);
926}
927
928#else
929#define CHECK_BUFFER(in)
930#endif
931
932
933/**
934 * xmlParserInputRead:
935 * @in: an XML parser input
936 * @len: an indicative size for the lookahead
937 *
938 * This function refresh the input for the parser. It doesn't try to
939 * preserve pointers to the input buffer, and discard already read data
940 *
941 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
942 * end of this entity
943 */
944int
945xmlParserInputRead(xmlParserInputPtr in, int len) {
946 int ret;
947 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000948 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000949
950#ifdef DEBUG_INPUT
951 xmlGenericError(xmlGenericErrorContext, "Read\n");
952#endif
953 if (in->buf == NULL) return(-1);
954 if (in->base == NULL) return(-1);
955 if (in->cur == NULL) return(-1);
956 if (in->buf->buffer == NULL) return(-1);
957 if (in->buf->readcallback == NULL) return(-1);
958
959 CHECK_BUFFER(in);
960
961 used = in->cur - in->buf->buffer->content;
962 ret = xmlBufferShrink(in->buf->buffer, used);
963 if (ret > 0) {
964 in->cur -= ret;
965 in->consumed += ret;
966 }
967 ret = xmlParserInputBufferRead(in->buf, len);
968 if (in->base != in->buf->buffer->content) {
969 /*
970 * the buffer has been realloced
971 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000972 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000973 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000974 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000975 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000976 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000977
978 CHECK_BUFFER(in);
979
980 return(ret);
981}
982
983/**
984 * xmlParserInputGrow:
985 * @in: an XML parser input
986 * @len: an indicative size for the lookahead
987 *
988 * This function increase the input for the parser. It tries to
989 * preserve pointers to the input buffer, and keep already read data
990 *
991 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
992 * end of this entity
993 */
994int
995xmlParserInputGrow(xmlParserInputPtr in, int len) {
996 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000997 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000998
999#ifdef DEBUG_INPUT
1000 xmlGenericError(xmlGenericErrorContext, "Grow\n");
1001#endif
1002 if (in->buf == NULL) return(-1);
1003 if (in->base == NULL) return(-1);
1004 if (in->cur == NULL) return(-1);
1005 if (in->buf->buffer == NULL) return(-1);
1006
1007 CHECK_BUFFER(in);
1008
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001009 indx = in->cur - in->base;
1010 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001011
1012 CHECK_BUFFER(in);
1013
1014 return(0);
1015 }
1016 if (in->buf->readcallback != NULL)
1017 ret = xmlParserInputBufferGrow(in->buf, len);
1018 else
1019 return(0);
1020
1021 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001022 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001023 * block, but we use it really as an integer to do some
1024 * pointer arithmetic. Insure will raise it as a bug but in
1025 * that specific case, that's not !
1026 */
1027 if (in->base != in->buf->buffer->content) {
1028 /*
1029 * the buffer has been realloced
1030 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001032 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001033 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001034 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001035 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001036
1037 CHECK_BUFFER(in);
1038
1039 return(ret);
1040}
1041
1042/**
1043 * xmlParserInputShrink:
1044 * @in: an XML parser input
1045 *
1046 * This function removes used input for the parser.
1047 */
1048void
1049xmlParserInputShrink(xmlParserInputPtr in) {
1050 int used;
1051 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001052 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001053
1054#ifdef DEBUG_INPUT
1055 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1056#endif
1057 if (in->buf == NULL) return;
1058 if (in->base == NULL) return;
1059 if (in->cur == NULL) return;
1060 if (in->buf->buffer == NULL) return;
1061
1062 CHECK_BUFFER(in);
1063
1064 used = in->cur - in->buf->buffer->content;
1065 /*
1066 * Do not shrink on large buffers whose only a tiny fraction
1067 * was consumned
1068 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001069 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return;
1071 if (used > INPUT_CHUNK) {
1072 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1073 if (ret > 0) {
1074 in->cur -= ret;
1075 in->consumed += ret;
1076 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001077 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001078 }
1079
1080 CHECK_BUFFER(in);
1081
1082 if (in->buf->buffer->use > INPUT_CHUNK) {
1083 return;
1084 }
1085 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1086 if (in->base != in->buf->buffer->content) {
1087 /*
1088 * the buffer has been realloced
1089 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001090 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001091 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001092 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001093 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001094 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001095
1096 CHECK_BUFFER(in);
1097}
1098
1099/************************************************************************
1100 * *
1101 * UTF8 character input and related functions *
1102 * *
1103 ************************************************************************/
1104
1105/**
1106 * xmlNextChar:
1107 * @ctxt: the XML parser context
1108 *
1109 * Skip to the next char input char.
1110 */
1111
1112void
1113xmlNextChar(xmlParserCtxtPtr ctxt) {
1114 if (ctxt->instate == XML_PARSER_EOF)
1115 return;
1116
1117 /*
1118 * 2.11 End-of-Line Handling
1119 * the literal two-character sequence "#xD#xA" or a standalone
1120 * literal #xD, an XML processor must pass to the application
1121 * the single character #xA.
1122 */
1123 if (ctxt->token != 0) ctxt->token = 0;
1124 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1125 if ((*ctxt->input->cur == 0) &&
1126 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1127 (ctxt->instate != XML_PARSER_COMMENT)) {
1128 /*
1129 * If we are at the end of the current entity and
1130 * the context allows it, we pop consumed entities
1131 * automatically.
1132 * the auto closing should be blocked in other cases
1133 */
1134 xmlPopInput(ctxt);
1135 } else {
1136 if (*(ctxt->input->cur) == '\n') {
1137 ctxt->input->line++; ctxt->input->col = 1;
1138 } else ctxt->input->col++;
1139 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1140 /*
1141 * We are supposed to handle UTF8, check it's valid
1142 * From rfc2044: encoding of the Unicode values on UTF-8:
1143 *
1144 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1145 * 0000 0000-0000 007F 0xxxxxxx
1146 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1147 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1148 *
1149 * Check for the 0x110000 limit too
1150 */
1151 const unsigned char *cur = ctxt->input->cur;
1152 unsigned char c;
1153
1154 c = *cur;
1155 if (c & 0x80) {
1156 if (cur[1] == 0)
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if ((cur[1] & 0xc0) != 0x80)
1159 goto encoding_error;
1160 if ((c & 0xe0) == 0xe0) {
1161 unsigned int val;
1162
1163 if (cur[2] == 0)
1164 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1165 if ((cur[2] & 0xc0) != 0x80)
1166 goto encoding_error;
1167 if ((c & 0xf0) == 0xf0) {
1168 if (cur[3] == 0)
1169 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1170 if (((c & 0xf8) != 0xf0) ||
1171 ((cur[3] & 0xc0) != 0x80))
1172 goto encoding_error;
1173 /* 4-byte code */
1174 ctxt->input->cur += 4;
1175 val = (cur[0] & 0x7) << 18;
1176 val |= (cur[1] & 0x3f) << 12;
1177 val |= (cur[2] & 0x3f) << 6;
1178 val |= cur[3] & 0x3f;
1179 } else {
1180 /* 3-byte code */
1181 ctxt->input->cur += 3;
1182 val = (cur[0] & 0xf) << 12;
1183 val |= (cur[1] & 0x3f) << 6;
1184 val |= cur[2] & 0x3f;
1185 }
1186 if (((val > 0xd7ff) && (val < 0xe000)) ||
1187 ((val > 0xfffd) && (val < 0x10000)) ||
1188 (val >= 0x110000)) {
1189 if ((ctxt->sax != NULL) &&
1190 (ctxt->sax->error != NULL))
1191 ctxt->sax->error(ctxt->userData,
1192 "Char 0x%X out of allowed range\n", val);
1193 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1194 ctxt->wellFormed = 0;
1195 ctxt->disableSAX = 1;
1196 }
1197 } else
1198 /* 2-byte code */
1199 ctxt->input->cur += 2;
1200 } else
1201 /* 1-byte code */
1202 ctxt->input->cur++;
1203 } else {
1204 /*
1205 * Assume it's a fixed lenght encoding (1) with
1206 * a compatibke encoding for the ASCII set, since
1207 * XML constructs only use < 128 chars
1208 */
1209 ctxt->input->cur++;
1210 }
1211 ctxt->nbChars++;
1212 if (*ctxt->input->cur == 0)
1213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1214 }
1215 } else {
1216 ctxt->input->cur++;
1217 ctxt->nbChars++;
1218 if (*ctxt->input->cur == 0)
1219 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1220 }
1221 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1222 xmlParserHandlePEReference(ctxt);
1223 if ((*ctxt->input->cur == 0) &&
1224 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1225 xmlPopInput(ctxt);
1226 return;
1227encoding_error:
1228 /*
1229 * If we detect an UTF8 error that probably mean that the
1230 * input encoding didn't get properly advertized in the
1231 * declaration header. Report the error and switch the encoding
1232 * to ISO-Latin-1 (if you don't like this policy, just declare the
1233 * encoding !)
1234 */
1235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1236 ctxt->sax->error(ctxt->userData,
1237 "Input is not proper UTF-8, indicate encoding !\n");
1238 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1239 ctxt->input->cur[0], ctxt->input->cur[1],
1240 ctxt->input->cur[2], ctxt->input->cur[3]);
1241 }
1242 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1243
1244 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1245 ctxt->input->cur++;
1246 return;
1247}
1248
1249/**
1250 * xmlCurrentChar:
1251 * @ctxt: the XML parser context
1252 * @len: pointer to the length of the char read
1253 *
1254 * The current char value, if using UTF-8 this may actaully span multiple
1255 * bytes in the input buffer. Implement the end of line normalization:
1256 * 2.11 End-of-Line Handling
1257 * Wherever an external parsed entity or the literal entity value
1258 * of an internal parsed entity contains either the literal two-character
1259 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1260 * must pass to the application the single character #xA.
1261 * This behavior can conveniently be produced by normalizing all
1262 * line breaks to #xA on input, before parsing.)
1263 *
1264 * Returns the current char value and its lenght
1265 */
1266
1267int
1268xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1269 if (ctxt->instate == XML_PARSER_EOF)
1270 return(0);
1271
1272 if (ctxt->token != 0) {
1273 *len = 0;
1274 return(ctxt->token);
1275 }
1276 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1277 *len = 1;
1278 return((int) *ctxt->input->cur);
1279 }
1280 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1281 /*
1282 * We are supposed to handle UTF8, check it's valid
1283 * From rfc2044: encoding of the Unicode values on UTF-8:
1284 *
1285 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1286 * 0000 0000-0000 007F 0xxxxxxx
1287 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1288 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1289 *
1290 * Check for the 0x110000 limit too
1291 */
1292 const unsigned char *cur = ctxt->input->cur;
1293 unsigned char c;
1294 unsigned int val;
1295
1296 c = *cur;
1297 if (c & 0x80) {
1298 if (cur[1] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if ((cur[1] & 0xc0) != 0x80)
1301 goto encoding_error;
1302 if ((c & 0xe0) == 0xe0) {
1303
1304 if (cur[2] == 0)
1305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1306 if ((cur[2] & 0xc0) != 0x80)
1307 goto encoding_error;
1308 if ((c & 0xf0) == 0xf0) {
1309 if (cur[3] == 0)
1310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1311 if (((c & 0xf8) != 0xf0) ||
1312 ((cur[3] & 0xc0) != 0x80))
1313 goto encoding_error;
1314 /* 4-byte code */
1315 *len = 4;
1316 val = (cur[0] & 0x7) << 18;
1317 val |= (cur[1] & 0x3f) << 12;
1318 val |= (cur[2] & 0x3f) << 6;
1319 val |= cur[3] & 0x3f;
1320 } else {
1321 /* 3-byte code */
1322 *len = 3;
1323 val = (cur[0] & 0xf) << 12;
1324 val |= (cur[1] & 0x3f) << 6;
1325 val |= cur[2] & 0x3f;
1326 }
1327 } else {
1328 /* 2-byte code */
1329 *len = 2;
1330 val = (cur[0] & 0x1f) << 6;
1331 val |= cur[1] & 0x3f;
1332 }
1333 if (!IS_CHAR(val)) {
1334 if ((ctxt->sax != NULL) &&
1335 (ctxt->sax->error != NULL))
1336 ctxt->sax->error(ctxt->userData,
1337 "Char 0x%X out of allowed range\n", val);
1338 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1339 ctxt->wellFormed = 0;
1340 ctxt->disableSAX = 1;
1341 }
1342 return(val);
1343 } else {
1344 /* 1-byte code */
1345 *len = 1;
1346 if (*ctxt->input->cur == 0xD) {
1347 if (ctxt->input->cur[1] == 0xA) {
1348 ctxt->nbChars++;
1349 ctxt->input->cur++;
1350 }
1351 return(0xA);
1352 }
1353 return((int) *ctxt->input->cur);
1354 }
1355 }
1356 /*
1357 * Assume it's a fixed lenght encoding (1) with
1358 * a compatibke encoding for the ASCII set, since
1359 * XML constructs only use < 128 chars
1360 */
1361 *len = 1;
1362 if (*ctxt->input->cur == 0xD) {
1363 if (ctxt->input->cur[1] == 0xA) {
1364 ctxt->nbChars++;
1365 ctxt->input->cur++;
1366 }
1367 return(0xA);
1368 }
1369 return((int) *ctxt->input->cur);
1370encoding_error:
1371 /*
1372 * If we detect an UTF8 error that probably mean that the
1373 * input encoding didn't get properly advertized in the
1374 * declaration header. Report the error and switch the encoding
1375 * to ISO-Latin-1 (if you don't like this policy, just declare the
1376 * encoding !)
1377 */
1378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1379 ctxt->sax->error(ctxt->userData,
1380 "Input is not proper UTF-8, indicate encoding !\n");
1381 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1382 ctxt->input->cur[0], ctxt->input->cur[1],
1383 ctxt->input->cur[2], ctxt->input->cur[3]);
1384 }
1385 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1386
1387 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1388 *len = 1;
1389 return((int) *ctxt->input->cur);
1390}
1391
1392/**
1393 * xmlStringCurrentChar:
1394 * @ctxt: the XML parser context
1395 * @cur: pointer to the beginning of the char
1396 * @len: pointer to the length of the char read
1397 *
1398 * The current char value, if using UTF-8 this may actaully span multiple
1399 * bytes in the input buffer.
1400 *
1401 * Returns the current char value and its lenght
1402 */
1403
1404int
1405xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001406 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001407 /*
1408 * We are supposed to handle UTF8, check it's valid
1409 * From rfc2044: encoding of the Unicode values on UTF-8:
1410 *
1411 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1412 * 0000 0000-0000 007F 0xxxxxxx
1413 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1414 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1415 *
1416 * Check for the 0x110000 limit too
1417 */
1418 unsigned char c;
1419 unsigned int val;
1420
1421 c = *cur;
1422 if (c & 0x80) {
1423 if ((cur[1] & 0xc0) != 0x80)
1424 goto encoding_error;
1425 if ((c & 0xe0) == 0xe0) {
1426
1427 if ((cur[2] & 0xc0) != 0x80)
1428 goto encoding_error;
1429 if ((c & 0xf0) == 0xf0) {
1430 if (((c & 0xf8) != 0xf0) ||
1431 ((cur[3] & 0xc0) != 0x80))
1432 goto encoding_error;
1433 /* 4-byte code */
1434 *len = 4;
1435 val = (cur[0] & 0x7) << 18;
1436 val |= (cur[1] & 0x3f) << 12;
1437 val |= (cur[2] & 0x3f) << 6;
1438 val |= cur[3] & 0x3f;
1439 } else {
1440 /* 3-byte code */
1441 *len = 3;
1442 val = (cur[0] & 0xf) << 12;
1443 val |= (cur[1] & 0x3f) << 6;
1444 val |= cur[2] & 0x3f;
1445 }
1446 } else {
1447 /* 2-byte code */
1448 *len = 2;
1449 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001450 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001451 }
1452 if (!IS_CHAR(val)) {
1453 if ((ctxt->sax != NULL) &&
1454 (ctxt->sax->error != NULL))
1455 ctxt->sax->error(ctxt->userData,
1456 "Char 0x%X out of allowed range\n", val);
1457 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1458 ctxt->wellFormed = 0;
1459 ctxt->disableSAX = 1;
1460 }
1461 return(val);
1462 } else {
1463 /* 1-byte code */
1464 *len = 1;
1465 return((int) *cur);
1466 }
1467 }
1468 /*
1469 * Assume it's a fixed lenght encoding (1) with
1470 * a compatibke encoding for the ASCII set, since
1471 * XML constructs only use < 128 chars
1472 */
1473 *len = 1;
1474 return((int) *cur);
1475encoding_error:
1476 /*
1477 * If we detect an UTF8 error that probably mean that the
1478 * input encoding didn't get properly advertized in the
1479 * declaration header. Report the error and switch the encoding
1480 * to ISO-Latin-1 (if you don't like this policy, just declare the
1481 * encoding !)
1482 */
1483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1484 ctxt->sax->error(ctxt->userData,
1485 "Input is not proper UTF-8, indicate encoding !\n");
1486 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1487 ctxt->input->cur[0], ctxt->input->cur[1],
1488 ctxt->input->cur[2], ctxt->input->cur[3]);
1489 }
1490 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1491
1492 *len = 1;
1493 return((int) *cur);
1494}
1495
1496/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001497 * xmlCopyCharMultiByte:
1498 * @out: pointer to an arry of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001499 * @val: the char value
1500 *
1501 * append the char value in the array
1502 *
1503 * Returns the number of xmlChar written
1504 */
Owen Taylor3473f882001-02-23 17:55:21 +00001505int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001506xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001507 /*
1508 * We are supposed to handle UTF8, check it's valid
1509 * From rfc2044: encoding of the Unicode values on UTF-8:
1510 *
1511 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1512 * 0000 0000-0000 007F 0xxxxxxx
1513 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1514 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1515 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001516 if (val >= 0x80) {
1517 xmlChar *savedout = out;
1518 int bits;
1519 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1520 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1521 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1522 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001523 xmlGenericError(xmlGenericErrorContext,
1524 "Internal error, xmlCopyChar 0x%X out of bound\n",
1525 val);
1526 return(0);
1527 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001528 for ( ; bits >= 0; bits-= 6)
1529 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1530 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 }
1532 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001533 return 1;
1534}
1535
1536/**
1537 * xmlCopyChar:
1538 * @len: Ignored, compatibility
1539 * @out: pointer to an arry of xmlChar
1540 * @val: the char value
1541 *
1542 * append the char value in the array
1543 *
1544 * Returns the number of xmlChar written
1545 */
1546
1547int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001548xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001549 /* the len parameter is ignored */
1550 if (val >= 0x80) {
1551 return(xmlCopyCharMultiByte (out, val));
1552 }
1553 *out = (xmlChar) val;
1554 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001555}
1556
1557/************************************************************************
1558 * *
1559 * Commodity functions to switch encodings *
1560 * *
1561 ************************************************************************/
1562
1563/**
1564 * xmlSwitchEncoding:
1565 * @ctxt: the parser context
1566 * @enc: the encoding value (number)
1567 *
1568 * change the input functions when discovering the character encoding
1569 * of a given entity.
1570 *
1571 * Returns 0 in case of success, -1 otherwise
1572 */
1573int
1574xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1575{
1576 xmlCharEncodingHandlerPtr handler;
1577
1578 switch (enc) {
1579 case XML_CHAR_ENCODING_ERROR:
1580 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1582 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1583 ctxt->wellFormed = 0;
1584 ctxt->disableSAX = 1;
1585 break;
1586 case XML_CHAR_ENCODING_NONE:
1587 /* let's assume it's UTF-8 without the XML decl */
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 return(0);
1590 case XML_CHAR_ENCODING_UTF8:
1591 /* default encoding, no conversion should be needed */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001593
1594 /*
1595 * Errata on XML-1.0 June 20 2001
1596 * Specific handling of the Byte Order Mark for
1597 * UTF-8
1598 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001599 if ((ctxt->input != NULL) &&
1600 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001601 (ctxt->input->cur[1] == 0xBB) &&
1602 (ctxt->input->cur[2] == 0xBF)) {
1603 ctxt->input->cur += 3;
1604 }
Owen Taylor3473f882001-02-23 17:55:21 +00001605 return(0);
1606 default:
1607 break;
1608 }
1609 handler = xmlGetCharEncodingHandler(enc);
1610 if (handler == NULL) {
1611 /*
1612 * Default handlers.
1613 */
1614 switch (enc) {
1615 case XML_CHAR_ENCODING_ERROR:
1616 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1619 ctxt->wellFormed = 0;
1620 ctxt->disableSAX = 1;
1621 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1622 break;
1623 case XML_CHAR_ENCODING_NONE:
1624 /* let's assume it's UTF-8 without the XML decl */
1625 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1626 return(0);
1627 case XML_CHAR_ENCODING_UTF8:
1628 case XML_CHAR_ENCODING_ASCII:
1629 /* default encoding, no conversion should be needed */
1630 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1631 return(0);
1632 case XML_CHAR_ENCODING_UTF16LE:
1633 break;
1634 case XML_CHAR_ENCODING_UTF16BE:
1635 break;
1636 case XML_CHAR_ENCODING_UCS4LE:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding USC4 little endian not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_UCS4BE:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding USC4 big endian not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_EBCDIC:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding EBCDIC not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS4_2143:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS4 2143 not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_UCS4_3412:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding UCS4 3412 not supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_UCS2:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding UCS2 not supported\n");
1671 break;
1672 case XML_CHAR_ENCODING_8859_1:
1673 case XML_CHAR_ENCODING_8859_2:
1674 case XML_CHAR_ENCODING_8859_3:
1675 case XML_CHAR_ENCODING_8859_4:
1676 case XML_CHAR_ENCODING_8859_5:
1677 case XML_CHAR_ENCODING_8859_6:
1678 case XML_CHAR_ENCODING_8859_7:
1679 case XML_CHAR_ENCODING_8859_8:
1680 case XML_CHAR_ENCODING_8859_9:
1681 /*
1682 * We used to keep the internal content in the
1683 * document encoding however this turns being unmaintainable
1684 * So xmlGetCharEncodingHandler() will return non-null
1685 * values for this now.
1686 */
1687 if ((ctxt->inputNr == 1) &&
1688 (ctxt->encoding == NULL) &&
1689 (ctxt->input->encoding != NULL)) {
1690 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1691 }
1692 ctxt->charset = enc;
1693 return(0);
1694 case XML_CHAR_ENCODING_2022_JP:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding ISO-2022-JPnot supported\n");
1699 break;
1700 case XML_CHAR_ENCODING_SHIFT_JIS:
1701 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1703 ctxt->sax->error(ctxt->userData,
1704 "char encoding Shift_JIS not supported\n");
1705 break;
1706 case XML_CHAR_ENCODING_EUC_JP:
1707 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "char encoding EUC-JPnot supported\n");
1711 break;
1712 }
1713 }
1714 if (handler == NULL)
1715 return(-1);
1716 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1717 return(xmlSwitchToEncoding(ctxt, handler));
1718}
1719
1720/**
1721 * xmlSwitchToEncoding:
1722 * @ctxt: the parser context
1723 * @handler: the encoding handler
1724 *
1725 * change the input functions when discovering the character encoding
1726 * of a given entity.
1727 *
1728 * Returns 0 in case of success, -1 otherwise
1729 */
1730int
1731xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1732{
1733 int nbchars;
1734
1735 if (handler != NULL) {
1736 if (ctxt->input != NULL) {
1737 if (ctxt->input->buf != NULL) {
1738 if (ctxt->input->buf->encoder != NULL) {
1739 if (ctxt->input->buf->encoder == handler)
1740 return(0);
1741 /*
1742 * Note: this is a bit dangerous, but that's what it
1743 * takes to use nearly compatible signature for different
1744 * encodings.
1745 */
1746 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1747 ctxt->input->buf->encoder = handler;
1748 return(0);
1749 }
1750 ctxt->input->buf->encoder = handler;
1751
1752 /*
1753 * Is there already some content down the pipe to convert ?
1754 */
1755 if ((ctxt->input->buf->buffer != NULL) &&
1756 (ctxt->input->buf->buffer->use > 0)) {
1757 int processed;
1758
1759 /*
1760 * Specific handling of the Byte Order Mark for
1761 * UTF-16
1762 */
1763 if ((handler->name != NULL) &&
1764 (!strcmp(handler->name, "UTF-16LE")) &&
1765 (ctxt->input->cur[0] == 0xFF) &&
1766 (ctxt->input->cur[1] == 0xFE)) {
1767 ctxt->input->cur += 2;
1768 }
1769 if ((handler->name != NULL) &&
1770 (!strcmp(handler->name, "UTF-16BE")) &&
1771 (ctxt->input->cur[0] == 0xFE) &&
1772 (ctxt->input->cur[1] == 0xFF)) {
1773 ctxt->input->cur += 2;
1774 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001775 /*
1776 * Errata on XML-1.0 June 20 2001
1777 * Specific handling of the Byte Order Mark for
1778 * UTF-8
1779 */
1780 if ((handler->name != NULL) &&
1781 (!strcmp(handler->name, "UTF-8")) &&
1782 (ctxt->input->cur[0] == 0xEF) &&
1783 (ctxt->input->cur[1] == 0xBB) &&
1784 (ctxt->input->cur[1] == 0xBF)) {
1785 ctxt->input->cur += 3;
1786 }
Owen Taylor3473f882001-02-23 17:55:21 +00001787
1788 /*
1789 * Shring the current input buffer.
1790 * Move it as the raw buffer and create a new input buffer
1791 */
1792 processed = ctxt->input->cur - ctxt->input->base;
1793 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1794 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1795 ctxt->input->buf->buffer = xmlBufferCreate();
1796
1797 if (ctxt->html) {
1798 /*
1799 * converst as much as possbile of the buffer
1800 */
1801 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1802 ctxt->input->buf->buffer,
1803 ctxt->input->buf->raw);
1804 } else {
1805 /*
1806 * convert just enough to get
1807 * '<?xml version="1.0" encoding="xxx"?>'
1808 * parsed with the autodetected encoding
1809 * into the parser reading buffer.
1810 */
1811 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1812 ctxt->input->buf->buffer,
1813 ctxt->input->buf->raw);
1814 }
1815 if (nbchars < 0) {
1816 xmlGenericError(xmlGenericErrorContext,
1817 "xmlSwitchToEncoding: encoder error\n");
1818 return(-1);
1819 }
1820 ctxt->input->base =
1821 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001822 ctxt->input->end =
1823 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001824
1825 }
1826 return(0);
1827 } else {
1828 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1829 /*
1830 * When parsing a static memory array one must know the
1831 * size to be able to convert the buffer.
1832 */
1833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1834 ctxt->sax->error(ctxt->userData,
1835 "xmlSwitchEncoding : no input\n");
1836 return(-1);
1837 } else {
1838 int processed;
1839
1840 /*
1841 * Shring the current input buffer.
1842 * Move it as the raw buffer and create a new input buffer
1843 */
1844 processed = ctxt->input->cur - ctxt->input->base;
1845
1846 ctxt->input->buf->raw = xmlBufferCreate();
1847 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1848 ctxt->input->length - processed);
1849 ctxt->input->buf->buffer = xmlBufferCreate();
1850
1851 /*
1852 * convert as much as possible of the raw input
1853 * to the parser reading buffer.
1854 */
1855 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1856 ctxt->input->buf->buffer,
1857 ctxt->input->buf->raw);
1858 if (nbchars < 0) {
1859 xmlGenericError(xmlGenericErrorContext,
1860 "xmlSwitchToEncoding: encoder error\n");
1861 return(-1);
1862 }
1863
1864 /*
1865 * Conversion succeeded, get rid of the old buffer
1866 */
1867 if ((ctxt->input->free != NULL) &&
1868 (ctxt->input->base != NULL))
1869 ctxt->input->free((xmlChar *) ctxt->input->base);
1870 ctxt->input->base =
1871 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001872 ctxt->input->end =
1873 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001874 }
1875 }
1876 } else {
1877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1878 ctxt->sax->error(ctxt->userData,
1879 "xmlSwitchEncoding : no input\n");
1880 return(-1);
1881 }
1882 /*
1883 * The parsing is now done in UTF8 natively
1884 */
1885 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1886 } else
1887 return(-1);
1888 return(0);
1889
1890}
1891
1892/************************************************************************
1893 * *
1894 * Commodity functions to handle entities processing *
1895 * *
1896 ************************************************************************/
1897
1898/**
1899 * xmlFreeInputStream:
1900 * @input: an xmlParserInputPtr
1901 *
1902 * Free up an input stream.
1903 */
1904void
1905xmlFreeInputStream(xmlParserInputPtr input) {
1906 if (input == NULL) return;
1907
1908 if (input->filename != NULL) xmlFree((char *) input->filename);
1909 if (input->directory != NULL) xmlFree((char *) input->directory);
1910 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1911 if (input->version != NULL) xmlFree((char *) input->version);
1912 if ((input->free != NULL) && (input->base != NULL))
1913 input->free((xmlChar *) input->base);
1914 if (input->buf != NULL)
1915 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001916 xmlFree(input);
1917}
1918
1919/**
1920 * xmlNewInputStream:
1921 * @ctxt: an XML parser context
1922 *
1923 * Create a new input stream structure
1924 * Returns the new input stream or NULL
1925 */
1926xmlParserInputPtr
1927xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1928 xmlParserInputPtr input;
1929
1930 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1931 if (input == NULL) {
1932 if (ctxt != NULL) {
1933 ctxt->errNo = XML_ERR_NO_MEMORY;
1934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1935 ctxt->sax->error(ctxt->userData,
1936 "malloc: couldn't allocate a new input stream\n");
1937 ctxt->errNo = XML_ERR_NO_MEMORY;
1938 }
1939 return(NULL);
1940 }
1941 memset(input, 0, sizeof(xmlParserInput));
1942 input->line = 1;
1943 input->col = 1;
1944 input->standalone = -1;
1945 return(input);
1946}
1947
1948/**
1949 * xmlNewIOInputStream:
1950 * @ctxt: an XML parser context
1951 * @input: an I/O Input
1952 * @enc: the charset encoding if known
1953 *
1954 * Create a new input stream structure encapsulating the @input into
1955 * a stream suitable for the parser.
1956 *
1957 * Returns the new input stream or NULL
1958 */
1959xmlParserInputPtr
1960xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1961 xmlCharEncoding enc) {
1962 xmlParserInputPtr inputStream;
1963
1964 if (xmlParserDebugEntities)
1965 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1966 inputStream = xmlNewInputStream(ctxt);
1967 if (inputStream == NULL) {
1968 return(NULL);
1969 }
1970 inputStream->filename = NULL;
1971 inputStream->buf = input;
1972 inputStream->base = inputStream->buf->buffer->content;
1973 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001974 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001975 if (enc != XML_CHAR_ENCODING_NONE) {
1976 xmlSwitchEncoding(ctxt, enc);
1977 }
1978
1979 return(inputStream);
1980}
1981
1982/**
1983 * xmlNewEntityInputStream:
1984 * @ctxt: an XML parser context
1985 * @entity: an Entity pointer
1986 *
1987 * Create a new input stream based on an xmlEntityPtr
1988 *
1989 * Returns the new input stream or NULL
1990 */
1991xmlParserInputPtr
1992xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1993 xmlParserInputPtr input;
1994
1995 if (entity == NULL) {
1996 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1998 ctxt->sax->error(ctxt->userData,
1999 "internal: xmlNewEntityInputStream entity = NULL\n");
2000 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2001 return(NULL);
2002 }
2003 if (xmlParserDebugEntities)
2004 xmlGenericError(xmlGenericErrorContext,
2005 "new input from entity: %s\n", entity->name);
2006 if (entity->content == NULL) {
2007 switch (entity->etype) {
2008 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2009 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2011 ctxt->sax->error(ctxt->userData,
2012 "xmlNewEntityInputStream unparsed entity !\n");
2013 break;
2014 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2015 case XML_EXTERNAL_PARAMETER_ENTITY:
2016 return(xmlLoadExternalEntity((char *) entity->URI,
2017 (char *) entity->ExternalID, ctxt));
2018 case XML_INTERNAL_GENERAL_ENTITY:
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "Internal entity %s without content !\n", entity->name);
2022 break;
2023 case XML_INTERNAL_PARAMETER_ENTITY:
2024 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2026 ctxt->sax->error(ctxt->userData,
2027 "Internal parameter entity %s without content !\n", entity->name);
2028 break;
2029 case XML_INTERNAL_PREDEFINED_ENTITY:
2030 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt->userData,
2033 "Predefined entity %s without content !\n", entity->name);
2034 break;
2035 }
2036 return(NULL);
2037 }
2038 input = xmlNewInputStream(ctxt);
2039 if (input == NULL) {
2040 return(NULL);
2041 }
2042 input->filename = (char *) entity->URI;
2043 input->base = entity->content;
2044 input->cur = entity->content;
2045 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002046 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002047 return(input);
2048}
2049
2050/**
2051 * xmlNewStringInputStream:
2052 * @ctxt: an XML parser context
2053 * @buffer: an memory buffer
2054 *
2055 * Create a new input stream based on a memory buffer.
2056 * Returns the new input stream
2057 */
2058xmlParserInputPtr
2059xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2060 xmlParserInputPtr input;
2061
2062 if (buffer == NULL) {
2063 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2065 ctxt->sax->error(ctxt->userData,
2066 "internal: xmlNewStringInputStream string = NULL\n");
2067 return(NULL);
2068 }
2069 if (xmlParserDebugEntities)
2070 xmlGenericError(xmlGenericErrorContext,
2071 "new fixed input: %.30s\n", buffer);
2072 input = xmlNewInputStream(ctxt);
2073 if (input == NULL) {
2074 return(NULL);
2075 }
2076 input->base = buffer;
2077 input->cur = buffer;
2078 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002079 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002080 return(input);
2081}
2082
2083/**
2084 * xmlNewInputFromFile:
2085 * @ctxt: an XML parser context
2086 * @filename: the filename to use as entity
2087 *
2088 * Create a new input stream based on a file.
2089 *
2090 * Returns the new input stream or NULL in case of error
2091 */
2092xmlParserInputPtr
2093xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2094 xmlParserInputBufferPtr buf;
2095 xmlParserInputPtr inputStream;
2096 char *directory = NULL;
2097 xmlChar *URI = NULL;
2098
2099 if (xmlParserDebugEntities)
2100 xmlGenericError(xmlGenericErrorContext,
2101 "new input from file: %s\n", filename);
2102 if (ctxt == NULL) return(NULL);
2103 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2104 if (buf == NULL)
2105 return(NULL);
2106
2107 URI = xmlStrdup((xmlChar *) filename);
2108 directory = xmlParserGetDirectory((const char *) URI);
2109
2110 inputStream = xmlNewInputStream(ctxt);
2111 if (inputStream == NULL) {
2112 if (directory != NULL) xmlFree((char *) directory);
2113 if (URI != NULL) xmlFree((char *) URI);
2114 return(NULL);
2115 }
2116
2117 inputStream->filename = (const char *) URI;
2118 inputStream->directory = directory;
2119 inputStream->buf = buf;
2120
2121 inputStream->base = inputStream->buf->buffer->content;
2122 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002123 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002124 if ((ctxt->directory == NULL) && (directory != NULL))
2125 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2126 return(inputStream);
2127}
2128
2129/************************************************************************
2130 * *
2131 * Commodity functions to handle parser contexts *
2132 * *
2133 ************************************************************************/
2134
2135/**
2136 * xmlInitParserCtxt:
2137 * @ctxt: an XML parser context
2138 *
2139 * Initialize a parser context
2140 */
2141
2142void
2143xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2144{
2145 xmlSAXHandler *sax;
2146
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002147 if(ctxt==NULL) {
2148 xmlGenericError(xmlGenericErrorContext,
2149 "xmlInitParserCtxt: NULL context given\n");
2150 return;
2151 }
2152
Owen Taylor3473f882001-02-23 17:55:21 +00002153 xmlDefaultSAXHandlerInit();
2154
2155 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2156 if (sax == NULL) {
2157 xmlGenericError(xmlGenericErrorContext,
2158 "xmlInitParserCtxt: out of memory\n");
2159 }
2160 else
2161 memset(sax, 0, sizeof(xmlSAXHandler));
2162
2163 /* Allocate the Input stack */
2164 ctxt->inputTab = (xmlParserInputPtr *)
2165 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2166 if (ctxt->inputTab == NULL) {
2167 xmlGenericError(xmlGenericErrorContext,
2168 "xmlInitParserCtxt: out of memory\n");
2169 ctxt->inputNr = 0;
2170 ctxt->inputMax = 0;
2171 ctxt->input = NULL;
2172 return;
2173 }
2174 ctxt->inputNr = 0;
2175 ctxt->inputMax = 5;
2176 ctxt->input = NULL;
2177
2178 ctxt->version = NULL;
2179 ctxt->encoding = NULL;
2180 ctxt->standalone = -1;
2181 ctxt->hasExternalSubset = 0;
2182 ctxt->hasPErefs = 0;
2183 ctxt->html = 0;
2184 ctxt->external = 0;
2185 ctxt->instate = XML_PARSER_START;
2186 ctxt->token = 0;
2187 ctxt->directory = NULL;
2188
2189 /* Allocate the Node stack */
2190 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2191 if (ctxt->nodeTab == NULL) {
2192 xmlGenericError(xmlGenericErrorContext,
2193 "xmlInitParserCtxt: out of memory\n");
2194 ctxt->nodeNr = 0;
2195 ctxt->nodeMax = 0;
2196 ctxt->node = NULL;
2197 ctxt->inputNr = 0;
2198 ctxt->inputMax = 0;
2199 ctxt->input = NULL;
2200 return;
2201 }
2202 ctxt->nodeNr = 0;
2203 ctxt->nodeMax = 10;
2204 ctxt->node = NULL;
2205
2206 /* Allocate the Name stack */
2207 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2208 if (ctxt->nameTab == NULL) {
2209 xmlGenericError(xmlGenericErrorContext,
2210 "xmlInitParserCtxt: out of memory\n");
2211 ctxt->nodeNr = 0;
2212 ctxt->nodeMax = 0;
2213 ctxt->node = NULL;
2214 ctxt->inputNr = 0;
2215 ctxt->inputMax = 0;
2216 ctxt->input = NULL;
2217 ctxt->nameNr = 0;
2218 ctxt->nameMax = 0;
2219 ctxt->name = NULL;
2220 return;
2221 }
2222 ctxt->nameNr = 0;
2223 ctxt->nameMax = 10;
2224 ctxt->name = NULL;
2225
2226 /* Allocate the space stack */
2227 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2228 if (ctxt->spaceTab == NULL) {
2229 xmlGenericError(xmlGenericErrorContext,
2230 "xmlInitParserCtxt: out of memory\n");
2231 ctxt->nodeNr = 0;
2232 ctxt->nodeMax = 0;
2233 ctxt->node = NULL;
2234 ctxt->inputNr = 0;
2235 ctxt->inputMax = 0;
2236 ctxt->input = NULL;
2237 ctxt->nameNr = 0;
2238 ctxt->nameMax = 0;
2239 ctxt->name = NULL;
2240 ctxt->spaceNr = 0;
2241 ctxt->spaceMax = 0;
2242 ctxt->space = NULL;
2243 return;
2244 }
2245 ctxt->spaceNr = 1;
2246 ctxt->spaceMax = 10;
2247 ctxt->spaceTab[0] = -1;
2248 ctxt->space = &ctxt->spaceTab[0];
2249
Daniel Veillard14be0a12001-03-03 18:50:55 +00002250 ctxt->sax = sax;
2251 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 ctxt->userData = ctxt;
2254 ctxt->myDoc = NULL;
2255 ctxt->wellFormed = 1;
2256 ctxt->valid = 1;
2257 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2258 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2259 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002260 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002261 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2262 ctxt->vctxt.userData = ctxt;
2263 if (ctxt->validate) {
2264 ctxt->vctxt.error = xmlParserValidityError;
2265 if (xmlGetWarningsDefaultValue == 0)
2266 ctxt->vctxt.warning = NULL;
2267 else
2268 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002269 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002270 } else {
2271 ctxt->vctxt.error = NULL;
2272 ctxt->vctxt.warning = NULL;
2273 }
2274 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2275 ctxt->record_info = 0;
2276 ctxt->nbChars = 0;
2277 ctxt->checkIndex = 0;
2278 ctxt->inSubset = 0;
2279 ctxt->errNo = XML_ERR_OK;
2280 ctxt->depth = 0;
2281 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002282 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 xmlInitNodeInfoSeq(&ctxt->node_seq);
2284}
2285
2286/**
2287 * xmlFreeParserCtxt:
2288 * @ctxt: an XML parser context
2289 *
2290 * Free all the memory used by a parser context. However the parsed
2291 * document in ctxt->myDoc is not freed.
2292 */
2293
2294void
2295xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2296{
2297 xmlParserInputPtr input;
2298 xmlChar *oldname;
2299
2300 if (ctxt == NULL) return;
2301
2302 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2303 xmlFreeInputStream(input);
2304 }
2305 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2306 xmlFree(oldname);
2307 }
2308 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2309 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2310 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2311 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2312 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2313 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2314 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2315 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2316 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2318 xmlFree(ctxt->sax);
2319 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002320 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002321#ifdef LIBXML_CATALOG_ENABLED
2322 if (ctxt->catalogs != NULL)
2323 xmlCatalogFreeLocal(ctxt->catalogs);
2324#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlFree(ctxt);
2326}
2327
2328/**
2329 * xmlNewParserCtxt:
2330 *
2331 * Allocate and initialize a new parser context.
2332 *
2333 * Returns the xmlParserCtxtPtr or NULL
2334 */
2335
2336xmlParserCtxtPtr
2337xmlNewParserCtxt()
2338{
2339 xmlParserCtxtPtr ctxt;
2340
2341 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2342 if (ctxt == NULL) {
2343 xmlGenericError(xmlGenericErrorContext,
2344 "xmlNewParserCtxt : cannot allocate context\n");
2345 perror("malloc");
2346 return(NULL);
2347 }
2348 memset(ctxt, 0, sizeof(xmlParserCtxt));
2349 xmlInitParserCtxt(ctxt);
2350 return(ctxt);
2351}
2352
2353/************************************************************************
2354 * *
2355 * Handling of node informations *
2356 * *
2357 ************************************************************************/
2358
2359/**
2360 * xmlClearParserCtxt:
2361 * @ctxt: an XML parser context
2362 *
2363 * Clear (release owned resources) and reinitialize a parser context
2364 */
2365
2366void
2367xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2368{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002369 if (ctxt==NULL)
2370 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002371 xmlClearNodeInfoSeq(&ctxt->node_seq);
2372 xmlInitParserCtxt(ctxt);
2373}
2374
2375/**
2376 * xmlParserFindNodeInfo:
2377 * @ctxt: an XML parser context
2378 * @node: an XML node within the tree
2379 *
2380 * Find the parser node info struct for a given node
2381 *
2382 * Returns an xmlParserNodeInfo block pointer or NULL
2383 */
2384const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2385 const xmlNode* node)
2386{
2387 unsigned long pos;
2388
2389 /* Find position where node should be at */
2390 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2391 if ( ctx->node_seq.buffer[pos].node == node )
2392 return &ctx->node_seq.buffer[pos];
2393 else
2394 return NULL;
2395}
2396
2397
2398/**
2399 * xmlInitNodeInfoSeq:
2400 * @seq: a node info sequence pointer
2401 *
2402 * -- Initialize (set to initial state) node info sequence
2403 */
2404void
2405xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2406{
2407 seq->length = 0;
2408 seq->maximum = 0;
2409 seq->buffer = NULL;
2410}
2411
2412/**
2413 * xmlClearNodeInfoSeq:
2414 * @seq: a node info sequence pointer
2415 *
2416 * -- Clear (release memory and reinitialize) node
2417 * info sequence
2418 */
2419void
2420xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2421{
2422 if ( seq->buffer != NULL )
2423 xmlFree(seq->buffer);
2424 xmlInitNodeInfoSeq(seq);
2425}
2426
2427
2428/**
2429 * xmlParserFindNodeInfoIndex:
2430 * @seq: a node info sequence pointer
2431 * @node: an XML node pointer
2432 *
2433 *
2434 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2435 * the given node is or should be at in a sorted sequence
2436 *
2437 * Returns a long indicating the position of the record
2438 */
2439unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2440 const xmlNode* node)
2441{
2442 unsigned long upper, lower, middle;
2443 int found = 0;
2444
2445 /* Do a binary search for the key */
2446 lower = 1;
2447 upper = seq->length;
2448 middle = 0;
2449 while ( lower <= upper && !found) {
2450 middle = lower + (upper - lower) / 2;
2451 if ( node == seq->buffer[middle - 1].node )
2452 found = 1;
2453 else if ( node < seq->buffer[middle - 1].node )
2454 upper = middle - 1;
2455 else
2456 lower = middle + 1;
2457 }
2458
2459 /* Return position */
2460 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2461 return middle;
2462 else
2463 return middle - 1;
2464}
2465
2466
2467/**
2468 * xmlParserAddNodeInfo:
2469 * @ctxt: an XML parser context
2470 * @info: a node info sequence pointer
2471 *
2472 * Insert node info record into the sorted sequence
2473 */
2474void
2475xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2476 const xmlParserNodeInfo* info)
2477{
2478 unsigned long pos;
2479 static unsigned int block_size = 5;
2480
2481 /* Find pos and check to see if node is already in the sequence */
2482 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2483 if ( pos < ctxt->node_seq.length
2484 && ctxt->node_seq.buffer[pos].node == info->node ) {
2485 ctxt->node_seq.buffer[pos] = *info;
2486 }
2487
2488 /* Otherwise, we need to add new node to buffer */
2489 else {
2490 /* Expand buffer by 5 if needed */
2491 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2492 xmlParserNodeInfo* tmp_buffer;
2493 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2494 *(ctxt->node_seq.maximum + block_size));
2495
2496 if ( ctxt->node_seq.buffer == NULL )
2497 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2498 else
2499 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2500
2501 if ( tmp_buffer == NULL ) {
2502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2503 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2504 ctxt->errNo = XML_ERR_NO_MEMORY;
2505 return;
2506 }
2507 ctxt->node_seq.buffer = tmp_buffer;
2508 ctxt->node_seq.maximum += block_size;
2509 }
2510
2511 /* If position is not at end, move elements out of the way */
2512 if ( pos != ctxt->node_seq.length ) {
2513 unsigned long i;
2514
2515 for ( i = ctxt->node_seq.length; i > pos; i-- )
2516 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2517 }
2518
2519 /* Copy element and increase length */
2520 ctxt->node_seq.buffer[pos] = *info;
2521 ctxt->node_seq.length++;
2522 }
2523}
2524
2525/************************************************************************
2526 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002527 * Defaults settings *
2528 * *
2529 ************************************************************************/
2530/**
2531 * xmlPedanticParserDefault:
2532 * @val: int 0 or 1
2533 *
2534 * Set and return the previous value for enabling pedantic warnings.
2535 *
2536 * Returns the last value for 0 for no substitution, 1 for substitution.
2537 */
2538
2539int
2540xmlPedanticParserDefault(int val) {
2541 int old = xmlPedanticParserDefaultValue;
2542
2543 xmlPedanticParserDefaultValue = val;
2544 return(old);
2545}
2546
2547/**
2548 * xmlLineNumbersDefault:
2549 * @val: int 0 or 1
2550 *
2551 * Set and return the previous value for enabling line numbers in elements
2552 * contents. This may break on old application and is turned off by default.
2553 *
2554 * Returns the last value for 0 for no substitution, 1 for substitution.
2555 */
2556
2557int
2558xmlLineNumbersDefault(int val) {
2559 int old = xmlLineNumbersDefaultValue;
2560
2561 xmlLineNumbersDefaultValue = val;
2562 return(old);
2563}
2564
2565/**
2566 * xmlSubstituteEntitiesDefault:
2567 * @val: int 0 or 1
2568 *
2569 * Set and return the previous value for default entity support.
2570 * Initially the parser always keep entity references instead of substituting
2571 * entity values in the output. This function has to be used to change the
2572 * default parser behaviour
2573 * SAX::subtituteEntities() has to be used for changing that on a file by
2574 * file basis.
2575 *
2576 * Returns the last value for 0 for no substitution, 1 for substitution.
2577 */
2578
2579int
2580xmlSubstituteEntitiesDefault(int val) {
2581 int old = xmlSubstituteEntitiesDefaultValue;
2582
2583 xmlSubstituteEntitiesDefaultValue = val;
2584 return(old);
2585}
2586
2587/**
2588 * xmlKeepBlanksDefault:
2589 * @val: int 0 or 1
2590 *
2591 * Set and return the previous value for default blanks text nodes support.
2592 * The 1.x version of the parser used an heuristic to try to detect
2593 * ignorable white spaces. As a result the SAX callback was generating
2594 * ignorableWhitespace() callbacks instead of characters() one, and when
2595 * using the DOM output text nodes containing those blanks were not generated.
2596 * The 2.x and later version will switch to the XML standard way and
2597 * ignorableWhitespace() are only generated when running the parser in
2598 * validating mode and when the current element doesn't allow CDATA or
2599 * mixed content.
2600 * This function is provided as a way to force the standard behaviour
2601 * on 1.X libs and to switch back to the old mode for compatibility when
2602 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2603 * by using xmlIsBlankNode() commodity function to detect the "empty"
2604 * nodes generated.
2605 * This value also affect autogeneration of indentation when saving code
2606 * if blanks sections are kept, indentation is not generated.
2607 *
2608 * Returns the last value for 0 for no substitution, 1 for substitution.
2609 */
2610
2611int
2612xmlKeepBlanksDefault(int val) {
2613 int old = xmlKeepBlanksDefaultValue;
2614
2615 xmlKeepBlanksDefaultValue = val;
2616 xmlIndentTreeOutput = !val;
2617 return(old);
2618}
2619
2620/************************************************************************
2621 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002622 * Deprecated functions kept for compatibility *
2623 * *
2624 ************************************************************************/
2625
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002626/**
2627 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002628 * @lang: pointer to the string value
2629 *
2630 * Checks that the value conforms to the LanguageID production:
2631 *
2632 * NOTE: this is somewhat deprecated, those productions were removed from
2633 * the XML Second edition.
2634 *
2635 * [33] LanguageID ::= Langcode ('-' Subcode)*
2636 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2637 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2638 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2639 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2640 * [38] Subcode ::= ([a-z] | [A-Z])+
2641 *
2642 * Returns 1 if correct 0 otherwise
2643 **/
2644int
2645xmlCheckLanguageID(const xmlChar *lang) {
2646 const xmlChar *cur = lang;
2647
2648 if (cur == NULL)
2649 return(0);
2650 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2651 ((cur[0] == 'I') && (cur[1] == '-'))) {
2652 /*
2653 * IANA code
2654 */
2655 cur += 2;
2656 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2657 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2658 cur++;
2659 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2660 ((cur[0] == 'X') && (cur[1] == '-'))) {
2661 /*
2662 * User code
2663 */
2664 cur += 2;
2665 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2666 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2667 cur++;
2668 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2669 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2670 /*
2671 * ISO639
2672 */
2673 cur++;
2674 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2675 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2676 cur++;
2677 else
2678 return(0);
2679 } else
2680 return(0);
2681 while (cur[0] != 0) { /* non input consuming */
2682 if (cur[0] != '-')
2683 return(0);
2684 cur++;
2685 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2686 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2687 cur++;
2688 else
2689 return(0);
2690 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2691 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2692 cur++;
2693 }
2694 return(1);
2695}
2696
2697/**
2698 * xmlDecodeEntities:
2699 * @ctxt: the parser context
2700 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2701 * @len: the len to decode (in bytes !), -1 for no size limit
2702 * @end: an end marker xmlChar, 0 if none
2703 * @end2: an end marker xmlChar, 0 if none
2704 * @end3: an end marker xmlChar, 0 if none
2705 *
2706 * This function is deprecated, we now always process entities content
2707 * through xmlStringDecodeEntities
2708 *
2709 * TODO: remove it in next major release.
2710 *
2711 * [67] Reference ::= EntityRef | CharRef
2712 *
2713 * [69] PEReference ::= '%' Name ';'
2714 *
2715 * Returns A newly allocated string with the substitution done. The caller
2716 * must deallocate it !
2717 */
2718xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002719xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2720 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002721#if 0
2722 xmlChar *buffer = NULL;
2723 unsigned int buffer_size = 0;
2724 unsigned int nbchars = 0;
2725
2726 xmlChar *current = NULL;
2727 xmlEntityPtr ent;
2728 unsigned int max = (unsigned int) len;
2729 int c,l;
2730#endif
2731
2732 static int deprecated = 0;
2733 if (!deprecated) {
2734 xmlGenericError(xmlGenericErrorContext,
2735 "xmlDecodeEntities() deprecated function reached\n");
2736 deprecated = 1;
2737 }
2738
2739#if 0
2740 if (ctxt->depth > 40) {
2741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2742 ctxt->sax->error(ctxt->userData,
2743 "Detected entity reference loop\n");
2744 ctxt->wellFormed = 0;
2745 ctxt->disableSAX = 1;
2746 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2747 return(NULL);
2748 }
2749
2750 /*
2751 * allocate a translation buffer.
2752 */
2753 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2754 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2755 if (buffer == NULL) {
2756 perror("xmlDecodeEntities: malloc failed");
2757 return(NULL);
2758 }
2759
2760 /*
2761 * Ok loop until we reach one of the ending char or a size limit.
2762 */
2763 GROW;
2764 c = CUR_CHAR(l);
2765 while ((nbchars < max) && (c != end) && /* NOTUSED */
2766 (c != end2) && (c != end3)) {
2767 GROW;
2768 if (c == 0) break;
2769 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2770 int val = xmlParseCharRef(ctxt);
2771 COPY_BUF(0,buffer,nbchars,val);
2772 NEXTL(l);
2773 } else if ((c == '&') && (ctxt->token != '&') &&
2774 (what & XML_SUBSTITUTE_REF)) {
2775 if (xmlParserDebugEntities)
2776 xmlGenericError(xmlGenericErrorContext,
2777 "decoding Entity Reference\n");
2778 ent = xmlParseEntityRef(ctxt);
2779 if ((ent != NULL) &&
2780 (ctxt->replaceEntities != 0)) {
2781 current = ent->content;
2782 while (*current != 0) { /* non input consuming loop */
2783 buffer[nbchars++] = *current++;
2784 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2785 growBuffer(buffer);
2786 }
2787 }
2788 } else if (ent != NULL) {
2789 const xmlChar *cur = ent->name;
2790
2791 buffer[nbchars++] = '&';
2792 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2793 growBuffer(buffer);
2794 }
2795 while (*cur != 0) { /* non input consuming loop */
2796 buffer[nbchars++] = *cur++;
2797 }
2798 buffer[nbchars++] = ';';
2799 }
2800 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2801 /*
2802 * a PEReference induce to switch the entity flow,
2803 * we break here to flush the current set of chars
2804 * parsed if any. We will be called back later.
2805 */
2806 if (xmlParserDebugEntities)
2807 xmlGenericError(xmlGenericErrorContext,
2808 "decoding PE Reference\n");
2809 if (nbchars != 0) break;
2810
2811 xmlParsePEReference(ctxt);
2812
2813 /*
2814 * Pop-up of finished entities.
2815 */
2816 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2817 xmlPopInput(ctxt);
2818
2819 break;
2820 } else {
2821 COPY_BUF(l,buffer,nbchars,c);
2822 NEXTL(l);
2823 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2824 growBuffer(buffer);
2825 }
2826 }
2827 c = CUR_CHAR(l);
2828 }
2829 buffer[nbchars++] = 0;
2830 return(buffer);
2831#endif
2832 return(NULL);
2833}
2834
2835/**
2836 * xmlNamespaceParseNCName:
2837 * @ctxt: an XML parser context
2838 *
2839 * parse an XML namespace name.
2840 *
2841 * TODO: this seems not in use anymore, the namespace handling is done on
2842 * top of the SAX interfaces, i.e. not on raw input.
2843 *
2844 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2845 *
2846 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2847 * CombiningChar | Extender
2848 *
2849 * Returns the namespace name or NULL
2850 */
2851
2852xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002853xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002854#if 0
2855 xmlChar buf[XML_MAX_NAMELEN + 5];
2856 int len = 0, l;
2857 int cur = CUR_CHAR(l);
2858#endif
2859
2860 static int deprecated = 0;
2861 if (!deprecated) {
2862 xmlGenericError(xmlGenericErrorContext,
2863 "xmlNamespaceParseNCName() deprecated function reached\n");
2864 deprecated = 1;
2865 }
2866
2867#if 0
2868 /* load first the value of the char !!! */
2869 GROW;
2870 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2871
2872xmlGenericError(xmlGenericErrorContext,
2873 "xmlNamespaceParseNCName: reached loop 3\n");
2874 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2875 (cur == '.') || (cur == '-') ||
2876 (cur == '_') ||
2877 (IS_COMBINING(cur)) ||
2878 (IS_EXTENDER(cur))) {
2879 COPY_BUF(l,buf,len,cur);
2880 NEXTL(l);
2881 cur = CUR_CHAR(l);
2882 if (len >= XML_MAX_NAMELEN) {
2883 xmlGenericError(xmlGenericErrorContext,
2884 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2885 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2886 (cur == '.') || (cur == '-') ||
2887 (cur == '_') ||
2888 (IS_COMBINING(cur)) ||
2889 (IS_EXTENDER(cur))) {
2890 NEXTL(l);
2891 cur = CUR_CHAR(l);
2892 }
2893 break;
2894 }
2895 }
2896 return(xmlStrndup(buf, len));
2897#endif
2898 return(NULL);
2899}
2900
2901/**
2902 * xmlNamespaceParseQName:
2903 * @ctxt: an XML parser context
2904 * @prefix: a xmlChar **
2905 *
2906 * TODO: this seems not in use anymore, the namespace handling is done on
2907 * top of the SAX interfaces, i.e. not on raw input.
2908 *
2909 * parse an XML qualified name
2910 *
2911 * [NS 5] QName ::= (Prefix ':')? LocalPart
2912 *
2913 * [NS 6] Prefix ::= NCName
2914 *
2915 * [NS 7] LocalPart ::= NCName
2916 *
2917 * Returns the local part, and prefix is updated
2918 * to get the Prefix if any.
2919 */
2920
2921xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002922xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002923
2924 static int deprecated = 0;
2925 if (!deprecated) {
2926 xmlGenericError(xmlGenericErrorContext,
2927 "xmlNamespaceParseQName() deprecated function reached\n");
2928 deprecated = 1;
2929 }
2930
2931#if 0
2932 xmlChar *ret = NULL;
2933
2934 *prefix = NULL;
2935 ret = xmlNamespaceParseNCName(ctxt);
2936 if (RAW == ':') {
2937 *prefix = ret;
2938 NEXT;
2939 ret = xmlNamespaceParseNCName(ctxt);
2940 }
2941
2942 return(ret);
2943#endif
2944 return(NULL);
2945}
2946
2947/**
2948 * xmlNamespaceParseNSDef:
2949 * @ctxt: an XML parser context
2950 *
2951 * parse a namespace prefix declaration
2952 *
2953 * TODO: this seems not in use anymore, the namespace handling is done on
2954 * top of the SAX interfaces, i.e. not on raw input.
2955 *
2956 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2957 *
2958 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2959 *
2960 * Returns the namespace name
2961 */
2962
2963xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002964xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002965 static int deprecated = 0;
2966 if (!deprecated) {
2967 xmlGenericError(xmlGenericErrorContext,
2968 "xmlNamespaceParseNSDef() deprecated function reached\n");
2969 deprecated = 1;
2970 }
2971 return(NULL);
2972#if 0
2973 xmlChar *name = NULL;
2974
2975 if ((RAW == 'x') && (NXT(1) == 'm') &&
2976 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2977 (NXT(4) == 's')) {
2978 SKIP(5);
2979 if (RAW == ':') {
2980 NEXT;
2981 name = xmlNamespaceParseNCName(ctxt);
2982 }
2983 }
2984 return(name);
2985#endif
2986}
2987
2988/**
2989 * xmlParseQuotedString:
2990 * @ctxt: an XML parser context
2991 *
2992 * Parse and return a string between quotes or doublequotes
2993 *
2994 * TODO: Deprecated, to be removed at next drop of binary compatibility
2995 *
2996 * Returns the string parser or NULL.
2997 */
2998xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002999xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003000 static int deprecated = 0;
3001 if (!deprecated) {
3002 xmlGenericError(xmlGenericErrorContext,
3003 "xmlParseQuotedString() deprecated function reached\n");
3004 deprecated = 1;
3005 }
3006 return(NULL);
3007
3008#if 0
3009 xmlChar *buf = NULL;
3010 int len = 0,l;
3011 int size = XML_PARSER_BUFFER_SIZE;
3012 int c;
3013
3014 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3015 if (buf == NULL) {
3016 xmlGenericError(xmlGenericErrorContext,
3017 "malloc of %d byte failed\n", size);
3018 return(NULL);
3019 }
3020xmlGenericError(xmlGenericErrorContext,
3021 "xmlParseQuotedString: reached loop 4\n");
3022 if (RAW == '"') {
3023 NEXT;
3024 c = CUR_CHAR(l);
3025 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3026 if (len + 5 >= size) {
3027 size *= 2;
3028 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3029 if (buf == NULL) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "realloc of %d byte failed\n", size);
3032 return(NULL);
3033 }
3034 }
3035 COPY_BUF(l,buf,len,c);
3036 NEXTL(l);
3037 c = CUR_CHAR(l);
3038 }
3039 if (c != '"') {
3040 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3042 ctxt->sax->error(ctxt->userData,
3043 "String not closed \"%.50s\"\n", buf);
3044 ctxt->wellFormed = 0;
3045 ctxt->disableSAX = 1;
3046 } else {
3047 NEXT;
3048 }
3049 } else if (RAW == '\''){
3050 NEXT;
3051 c = CUR;
3052 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3053 if (len + 1 >= size) {
3054 size *= 2;
3055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3056 if (buf == NULL) {
3057 xmlGenericError(xmlGenericErrorContext,
3058 "realloc of %d byte failed\n", size);
3059 return(NULL);
3060 }
3061 }
3062 buf[len++] = c;
3063 NEXT;
3064 c = CUR;
3065 }
3066 if (RAW != '\'') {
3067 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3069 ctxt->sax->error(ctxt->userData,
3070 "String not closed \"%.50s\"\n", buf);
3071 ctxt->wellFormed = 0;
3072 ctxt->disableSAX = 1;
3073 } else {
3074 NEXT;
3075 }
3076 }
3077 return(buf);
3078#endif
3079}
3080
3081/**
3082 * xmlParseNamespace:
3083 * @ctxt: an XML parser context
3084 *
3085 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3086 *
3087 * This is what the older xml-name Working Draft specified, a bunch of
3088 * other stuff may still rely on it, so support is still here as
3089 * if it was declared on the root of the Tree:-(
3090 *
3091 * TODO: remove from library
3092 *
3093 * To be removed at next drop of binary compatibility
3094 */
3095
3096void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003097xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003098 static int deprecated = 0;
3099 if (!deprecated) {
3100 xmlGenericError(xmlGenericErrorContext,
3101 "xmlParseNamespace() deprecated function reached\n");
3102 deprecated = 1;
3103 }
3104
3105#if 0
3106 xmlChar *href = NULL;
3107 xmlChar *prefix = NULL;
3108 int garbage = 0;
3109
3110 /*
3111 * We just skipped "namespace" or "xml:namespace"
3112 */
3113 SKIP_BLANKS;
3114
3115xmlGenericError(xmlGenericErrorContext,
3116 "xmlParseNamespace: reached loop 5\n");
3117 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3118 /*
3119 * We can have "ns" or "prefix" attributes
3120 * Old encoding as 'href' or 'AS' attributes is still supported
3121 */
3122 if ((RAW == 'n') && (NXT(1) == 's')) {
3123 garbage = 0;
3124 SKIP(2);
3125 SKIP_BLANKS;
3126
3127 if (RAW != '=') continue;
3128 NEXT;
3129 SKIP_BLANKS;
3130
3131 href = xmlParseQuotedString(ctxt);
3132 SKIP_BLANKS;
3133 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3134 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3135 garbage = 0;
3136 SKIP(4);
3137 SKIP_BLANKS;
3138
3139 if (RAW != '=') continue;
3140 NEXT;
3141 SKIP_BLANKS;
3142
3143 href = xmlParseQuotedString(ctxt);
3144 SKIP_BLANKS;
3145 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3146 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3147 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3148 garbage = 0;
3149 SKIP(6);
3150 SKIP_BLANKS;
3151
3152 if (RAW != '=') continue;
3153 NEXT;
3154 SKIP_BLANKS;
3155
3156 prefix = xmlParseQuotedString(ctxt);
3157 SKIP_BLANKS;
3158 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3159 garbage = 0;
3160 SKIP(2);
3161 SKIP_BLANKS;
3162
3163 if (RAW != '=') continue;
3164 NEXT;
3165 SKIP_BLANKS;
3166
3167 prefix = xmlParseQuotedString(ctxt);
3168 SKIP_BLANKS;
3169 } else if ((RAW == '?') && (NXT(1) == '>')) {
3170 garbage = 0;
3171 NEXT;
3172 } else {
3173 /*
3174 * Found garbage when parsing the namespace
3175 */
3176 if (!garbage) {
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt->userData,
3179 "xmlParseNamespace found garbage\n");
3180 }
3181 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3182 ctxt->wellFormed = 0;
3183 ctxt->disableSAX = 1;
3184 NEXT;
3185 }
3186 }
3187
3188 MOVETO_ENDTAG(CUR_PTR);
3189 NEXT;
3190
3191 /*
3192 * Register the DTD.
3193 if (href != NULL)
3194 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3195 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3196 */
3197
3198 if (prefix != NULL) xmlFree(prefix);
3199 if (href != NULL) xmlFree(href);
3200#endif
3201}
3202
3203/**
3204 * xmlScanName:
3205 * @ctxt: an XML parser context
3206 *
3207 * Trickery: parse an XML name but without consuming the input flow
3208 * Needed for rollback cases. Used only when parsing entities references.
3209 *
3210 * TODO: seems deprecated now, only used in the default part of
3211 * xmlParserHandleReference
3212 *
3213 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3214 * CombiningChar | Extender
3215 *
3216 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3217 *
3218 * [6] Names ::= Name (S Name)*
3219 *
3220 * Returns the Name parsed or NULL
3221 */
3222
3223xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003224xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003225 static int deprecated = 0;
3226 if (!deprecated) {
3227 xmlGenericError(xmlGenericErrorContext,
3228 "xmlScanName() deprecated function reached\n");
3229 deprecated = 1;
3230 }
3231 return(NULL);
3232
3233#if 0
3234 xmlChar buf[XML_MAX_NAMELEN];
3235 int len = 0;
3236
3237 GROW;
3238 if (!IS_LETTER(RAW) && (RAW != '_') &&
3239 (RAW != ':')) {
3240 return(NULL);
3241 }
3242
3243
3244 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3245 (NXT(len) == '.') || (NXT(len) == '-') ||
3246 (NXT(len) == '_') || (NXT(len) == ':') ||
3247 (IS_COMBINING(NXT(len))) ||
3248 (IS_EXTENDER(NXT(len)))) {
3249 GROW;
3250 buf[len] = NXT(len);
3251 len++;
3252 if (len >= XML_MAX_NAMELEN) {
3253 xmlGenericError(xmlGenericErrorContext,
3254 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3255 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3256 (IS_DIGIT(NXT(len))) ||
3257 (NXT(len) == '.') || (NXT(len) == '-') ||
3258 (NXT(len) == '_') || (NXT(len) == ':') ||
3259 (IS_COMBINING(NXT(len))) ||
3260 (IS_EXTENDER(NXT(len))))
3261 len++;
3262 break;
3263 }
3264 }
3265 return(xmlStrndup(buf, len));
3266#endif
3267}
3268
3269/**
3270 * xmlParserHandleReference:
3271 * @ctxt: the parser context
3272 *
3273 * TODO: Remove, now deprecated ... the test is done directly in the
3274 * content parsing
3275 * routines.
3276 *
3277 * [67] Reference ::= EntityRef | CharRef
3278 *
3279 * [68] EntityRef ::= '&' Name ';'
3280 *
3281 * [ WFC: Entity Declared ]
3282 * the Name given in the entity reference must match that in an entity
3283 * declaration, except that well-formed documents need not declare any
3284 * of the following entities: amp, lt, gt, apos, quot.
3285 *
3286 * [ WFC: Parsed Entity ]
3287 * An entity reference must not contain the name of an unparsed entity
3288 *
3289 * [66] CharRef ::= '&#' [0-9]+ ';' |
3290 * '&#x' [0-9a-fA-F]+ ';'
3291 *
3292 * A PEReference may have been detectect in the current input stream
3293 * the handling is done accordingly to
3294 * http://www.w3.org/TR/REC-xml#entproc
3295 */
3296void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003297xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003298 static int deprecated = 0;
3299 if (!deprecated) {
3300 xmlGenericError(xmlGenericErrorContext,
3301 "xmlParserHandleReference() deprecated function reached\n");
3302 deprecated = 1;
3303 }
3304
3305#if 0
3306 xmlParserInputPtr input;
3307 xmlChar *name;
3308 xmlEntityPtr ent = NULL;
3309
3310 if (ctxt->token != 0) {
3311 return;
3312 }
3313 if (RAW != '&') return;
3314 GROW;
3315 if ((RAW == '&') && (NXT(1) == '#')) {
3316 switch(ctxt->instate) {
3317 case XML_PARSER_ENTITY_DECL:
3318 case XML_PARSER_PI:
3319 case XML_PARSER_CDATA_SECTION:
3320 case XML_PARSER_COMMENT:
3321 case XML_PARSER_SYSTEM_LITERAL:
3322 /* we just ignore it there */
3323 return;
3324 case XML_PARSER_START_TAG:
3325 return;
3326 case XML_PARSER_END_TAG:
3327 return;
3328 case XML_PARSER_EOF:
3329 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3332 ctxt->wellFormed = 0;
3333 ctxt->disableSAX = 1;
3334 return;
3335 case XML_PARSER_PROLOG:
3336 case XML_PARSER_START:
3337 case XML_PARSER_MISC:
3338 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3341 ctxt->wellFormed = 0;
3342 ctxt->disableSAX = 1;
3343 return;
3344 case XML_PARSER_EPILOG:
3345 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3348 ctxt->wellFormed = 0;
3349 ctxt->disableSAX = 1;
3350 return;
3351 case XML_PARSER_DTD:
3352 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3354 ctxt->sax->error(ctxt->userData,
3355 "CharRef are forbiden in DTDs!\n");
3356 ctxt->wellFormed = 0;
3357 ctxt->disableSAX = 1;
3358 return;
3359 case XML_PARSER_ENTITY_VALUE:
3360 /*
3361 * NOTE: in the case of entity values, we don't do the
3362 * substitution here since we need the literal
3363 * entity value to be able to save the internal
3364 * subset of the document.
3365 * This will be handled by xmlStringDecodeEntities
3366 */
3367 return;
3368 case XML_PARSER_CONTENT:
3369 return;
3370 case XML_PARSER_ATTRIBUTE_VALUE:
3371 /* ctxt->token = xmlParseCharRef(ctxt); */
3372 return;
3373 case XML_PARSER_IGNORE:
3374 return;
3375 }
3376 return;
3377 }
3378
3379 switch(ctxt->instate) {
3380 case XML_PARSER_CDATA_SECTION:
3381 return;
3382 case XML_PARSER_PI:
3383 case XML_PARSER_COMMENT:
3384 case XML_PARSER_SYSTEM_LITERAL:
3385 case XML_PARSER_CONTENT:
3386 return;
3387 case XML_PARSER_START_TAG:
3388 return;
3389 case XML_PARSER_END_TAG:
3390 return;
3391 case XML_PARSER_EOF:
3392 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3394 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3395 ctxt->wellFormed = 0;
3396 ctxt->disableSAX = 1;
3397 return;
3398 case XML_PARSER_PROLOG:
3399 case XML_PARSER_START:
3400 case XML_PARSER_MISC:
3401 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3404 ctxt->wellFormed = 0;
3405 ctxt->disableSAX = 1;
3406 return;
3407 case XML_PARSER_EPILOG:
3408 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3410 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3411 ctxt->wellFormed = 0;
3412 ctxt->disableSAX = 1;
3413 return;
3414 case XML_PARSER_ENTITY_VALUE:
3415 /*
3416 * NOTE: in the case of entity values, we don't do the
3417 * substitution here since we need the literal
3418 * entity value to be able to save the internal
3419 * subset of the document.
3420 * This will be handled by xmlStringDecodeEntities
3421 */
3422 return;
3423 case XML_PARSER_ATTRIBUTE_VALUE:
3424 /*
3425 * NOTE: in the case of attributes values, we don't do the
3426 * substitution here unless we are in a mode where
3427 * the parser is explicitely asked to substitute
3428 * entities. The SAX callback is called with values
3429 * without entity substitution.
3430 * This will then be handled by xmlStringDecodeEntities
3431 */
3432 return;
3433 case XML_PARSER_ENTITY_DECL:
3434 /*
3435 * we just ignore it there
3436 * the substitution will be done once the entity is referenced
3437 */
3438 return;
3439 case XML_PARSER_DTD:
3440 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3442 ctxt->sax->error(ctxt->userData,
3443 "Entity references are forbiden in DTDs!\n");
3444 ctxt->wellFormed = 0;
3445 ctxt->disableSAX = 1;
3446 return;
3447 case XML_PARSER_IGNORE:
3448 return;
3449 }
3450
3451/* TODO: this seems not reached anymore .... Verify ... */
3452xmlGenericError(xmlGenericErrorContext,
3453 "Reached deprecated section in xmlParserHandleReference()\n");
3454xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003455 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003456xmlGenericError(xmlGenericErrorContext,
3457 "indicating the version: %s, thanks !\n", xmlParserVersion);
3458 NEXT;
3459 name = xmlScanName(ctxt);
3460 if (name == NULL) {
3461 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3464 ctxt->wellFormed = 0;
3465 ctxt->disableSAX = 1;
3466 ctxt->token = '&';
3467 return;
3468 }
3469 if (NXT(xmlStrlen(name)) != ';') {
3470 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3472 ctxt->sax->error(ctxt->userData,
3473 "Entity reference: ';' expected\n");
3474 ctxt->wellFormed = 0;
3475 ctxt->disableSAX = 1;
3476 ctxt->token = '&';
3477 xmlFree(name);
3478 return;
3479 }
3480 SKIP(xmlStrlen(name) + 1);
3481 if (ctxt->sax != NULL) {
3482 if (ctxt->sax->getEntity != NULL)
3483 ent = ctxt->sax->getEntity(ctxt->userData, name);
3484 }
3485
3486 /*
3487 * [ WFC: Entity Declared ]
3488 * the Name given in the entity reference must match that in an entity
3489 * declaration, except that well-formed documents need not declare any
3490 * of the following entities: amp, lt, gt, apos, quot.
3491 */
3492 if (ent == NULL)
3493 ent = xmlGetPredefinedEntity(name);
3494 if (ent == NULL) {
3495 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3497 ctxt->sax->error(ctxt->userData,
3498 "Entity reference: entity %s not declared\n",
3499 name);
3500 ctxt->wellFormed = 0;
3501 ctxt->disableSAX = 1;
3502 xmlFree(name);
3503 return;
3504 }
3505
3506 /*
3507 * [ WFC: Parsed Entity ]
3508 * An entity reference must not contain the name of an unparsed entity
3509 */
3510 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3511 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513 ctxt->sax->error(ctxt->userData,
3514 "Entity reference to unparsed entity %s\n", name);
3515 ctxt->wellFormed = 0;
3516 ctxt->disableSAX = 1;
3517 }
3518
3519 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3520 ctxt->token = ent->content[0];
3521 xmlFree(name);
3522 return;
3523 }
3524 input = xmlNewEntityInputStream(ctxt, ent);
3525 xmlPushInput(ctxt, input);
3526 xmlFree(name);
3527#endif
3528 return;
3529}
3530
3531/**
3532 * xmlHandleEntity:
3533 * @ctxt: an XML parser context
3534 * @entity: an XML entity pointer.
3535 *
3536 * Default handling of defined entities, when should we define a new input
3537 * stream ? When do we just handle that as a set of chars ?
3538 *
3539 * OBSOLETE: to be removed at some point.
3540 */
3541
3542void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003543xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003544 static int deprecated = 0;
3545 if (!deprecated) {
3546 xmlGenericError(xmlGenericErrorContext,
3547 "xmlHandleEntity() deprecated function reached\n");
3548 deprecated = 1;
3549 }
3550
3551#if 0
3552 int len;
3553 xmlParserInputPtr input;
3554
3555 if (entity->content == NULL) {
3556 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3559 entity->name);
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 return;
3563 }
3564 len = xmlStrlen(entity->content);
3565 if (len <= 2) goto handle_as_char;
3566
3567 /*
3568 * Redefine its content as an input stream.
3569 */
3570 input = xmlNewEntityInputStream(ctxt, entity);
3571 xmlPushInput(ctxt, input);
3572 return;
3573
3574handle_as_char:
3575 /*
3576 * Just handle the content as a set of chars.
3577 */
3578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3579 (ctxt->sax->characters != NULL))
3580 ctxt->sax->characters(ctxt->userData, entity->content, len);
3581#endif
3582}
3583
3584/**
3585 * xmlNewGlobalNs:
3586 * @doc: the document carrying the namespace
3587 * @href: the URI associated
3588 * @prefix: the prefix for the namespace
3589 *
3590 * Creation of a Namespace, the old way using PI and without scoping
3591 * DEPRECATED !!!
3592 * It now create a namespace on the root element of the document if found.
3593 * Returns NULL this functionnality had been removed
3594 */
3595xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003596xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3597 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003598 static int deprecated = 0;
3599 if (!deprecated) {
3600 xmlGenericError(xmlGenericErrorContext,
3601 "xmlNewGlobalNs() deprecated function reached\n");
3602 deprecated = 1;
3603 }
3604 return(NULL);
3605#if 0
3606 xmlNodePtr root;
3607
3608 xmlNsPtr cur;
3609
3610 root = xmlDocGetRootElement(doc);
3611 if (root != NULL)
3612 return(xmlNewNs(root, href, prefix));
3613
3614 /*
3615 * if there is no root element yet, create an old Namespace type
3616 * and it will be moved to the root at save time.
3617 */
3618 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3619 if (cur == NULL) {
3620 xmlGenericError(xmlGenericErrorContext,
3621 "xmlNewGlobalNs : malloc failed\n");
3622 return(NULL);
3623 }
3624 memset(cur, 0, sizeof(xmlNs));
3625 cur->type = XML_GLOBAL_NAMESPACE;
3626
3627 if (href != NULL)
3628 cur->href = xmlStrdup(href);
3629 if (prefix != NULL)
3630 cur->prefix = xmlStrdup(prefix);
3631
3632 /*
3633 * Add it at the end to preserve parsing order ...
3634 */
3635 if (doc != NULL) {
3636 if (doc->oldNs == NULL) {
3637 doc->oldNs = cur;
3638 } else {
3639 xmlNsPtr prev = doc->oldNs;
3640
3641 while (prev->next != NULL) prev = prev->next;
3642 prev->next = cur;
3643 }
3644 }
3645
3646 return(NULL);
3647#endif
3648}
3649
3650/**
3651 * xmlUpgradeOldNs:
3652 * @doc: a document pointer
3653 *
3654 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3655 * DEPRECATED
3656 */
3657void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003658xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003659 static int deprecated = 0;
3660 if (!deprecated) {
3661 xmlGenericError(xmlGenericErrorContext,
3662 "xmlNewGlobalNs() deprecated function reached\n");
3663 deprecated = 1;
3664 }
3665#if 0
3666 xmlNsPtr cur;
3667
3668 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3669 if (doc->children == NULL) {
3670#ifdef DEBUG_TREE
3671 xmlGenericError(xmlGenericErrorContext,
3672 "xmlUpgradeOldNs: failed no root !\n");
3673#endif
3674 return;
3675 }
3676
3677 cur = doc->oldNs;
3678 while (cur->next != NULL) {
3679 cur->type = XML_LOCAL_NAMESPACE;
3680 cur = cur->next;
3681 }
3682 cur->type = XML_LOCAL_NAMESPACE;
3683 cur->next = doc->children->nsDef;
3684 doc->children->nsDef = doc->oldNs;
3685 doc->oldNs = NULL;
3686#endif
3687}
3688