blob: 9a570c91076f5256e26f6709d25ce2314113c70e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Daniel Veillard3c5ed912002-01-08 10:36:16 +000012#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000084 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000087 }
88 if ((myversion / 100) < (version / 100)) {
89 xmlGenericError(xmlGenericErrorContext,
90 "Warning: program compiled against libxml %d using older %d\n",
91 (version / 100), (myversion / 100));
92 }
93}
94
95
Daniel Veillard22090732001-07-16 00:06:07 +000096static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000097 "validate",
98 "load subset",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000141/**
Owen Taylor3473f882001-02-23 17:55:21 +0000142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000168/**
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->loadsubset;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000269/**
Owen Taylor3473f882001-02-23 17:55:21 +0000270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 int newvalidate = *((int *) value);
286 if ((!ctxt->validate) && (newvalidate != 0)) {
287 if (ctxt->vctxt.warning == NULL)
288 ctxt->vctxt.warning = xmlParserValidityWarning;
289 if (ctxt->vctxt.error == NULL)
290 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000291 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000292 }
293 ctxt->validate = newvalidate;
294 } else if (!strcmp(name, "keep blanks")) {
295 ctxt->keepBlanks = *((int *) value);
296 } else if (!strcmp(name, "disable SAX")) {
297 ctxt->disableSAX = *((int *) value);
298 } else if (!strcmp(name, "fetch external entities")) {
299 ctxt->loadsubset = *((int *) value);
300 } else if (!strcmp(name, "substitute entities")) {
301 ctxt->replaceEntities = *((int *) value);
302 } else if (!strcmp(name, "gather line info")) {
303 ctxt->record_info = *((int *) value);
304 } else if (!strcmp(name, "user data")) {
305 ctxt->userData = *((void **)value);
306 } else if (!strcmp(name, "is html")) {
307 ctxt->html = *((int *) value);
308 } else if (!strcmp(name, "is standalone")) {
309 ctxt->standalone = *((int *) value);
310 } else if (!strcmp(name, "document")) {
311 ctxt->myDoc = *((xmlDocPtr *) value);
312 } else if (!strcmp(name, "is well formed")) {
313 ctxt->wellFormed = *((int *) value);
314 } else if (!strcmp(name, "is valid")) {
315 ctxt->valid = *((int *) value);
316 } else if (!strcmp(name, "SAX block")) {
317 ctxt->sax = *((xmlSAXHandlerPtr *) value);
318 } else if (!strcmp(name, "SAX function internalSubset")) {
319 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function isStandalone")) {
321 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
323 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
325 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function resolveEntity")) {
327 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function getEntity")) {
329 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
330 } else if (!strcmp(name, "SAX function entityDecl")) {
331 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function notationDecl")) {
333 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function attributeDecl")) {
335 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function elementDecl")) {
337 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
339 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
341 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startDocument")) {
343 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endDocument")) {
345 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function startElement")) {
347 ctxt->sax->startElement = *((startElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function endElement")) {
349 ctxt->sax->endElement = *((endElementSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function reference")) {
351 ctxt->sax->reference = *((referenceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function characters")) {
353 ctxt->sax->characters = *((charactersSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
355 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function processingInstruction")) {
357 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function comment")) {
359 ctxt->sax->comment = *((commentSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function warning")) {
361 ctxt->sax->warning = *((warningSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function error")) {
363 ctxt->sax->error = *((errorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function fatalError")) {
365 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function getParameterEntity")) {
367 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
368 } else if (!strcmp(name, "SAX function cdataBlock")) {
369 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function externalSubset")) {
371 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
372 } else {
373 return(-1);
374 }
375 return(0);
376}
377
378/************************************************************************
379 * *
380 * Some functions to avoid too large macros *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlIsChar:
386 * @c: an unicode character (int)
387 *
388 * Check whether the character is allowed by the production
389 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
390 * | [#x10000-#x10FFFF]
391 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
392 * Also available as a macro IS_CHAR()
393 *
394 * Returns 0 if not, non-zero otherwise
395 */
396int
397xmlIsChar(int c) {
398 return(
399 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
400 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
401 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
402 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
403}
404
405/**
406 * xmlIsBlank:
407 * @c: an unicode character (int)
408 *
409 * Check whether the character is allowed by the production
410 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
411 * Also available as a macro IS_BLANK()
412 *
413 * Returns 0 if not, non-zero otherwise
414 */
415int
416xmlIsBlank(int c) {
417 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
418}
419
420/**
421 * xmlIsBaseChar:
422 * @c: an unicode character (int)
423 *
424 * Check whether the character is allowed by the production
425 * [85] BaseChar ::= ... long list see REC ...
426 *
427 * VI is your friend !
428 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
429 * and
430 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
431 *
432 * Returns 0 if not, non-zero otherwise
433 */
434static int xmlBaseArray[] = {
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
441 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
450 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
451};
452
453int
454xmlIsBaseChar(int c) {
455 return(
456 (((c) < 0x0100) ? xmlBaseArray[c] :
457 ( /* accelerator */
458 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
459 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
460 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
461 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
462 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
463 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
464 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
465 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
466 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
467 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
468 ((c) == 0x0386) ||
469 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
470 ((c) == 0x038C) ||
471 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
472 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
473 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
474 ((c) == 0x03DA) ||
475 ((c) == 0x03DC) ||
476 ((c) == 0x03DE) ||
477 ((c) == 0x03E0) ||
478 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
479 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
480 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
481 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
482 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
483 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
484 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
485 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
486 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
487 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
488 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
489 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
490 ((c) == 0x0559) ||
491 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
492 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
493 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
494 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
495 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
496 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
497 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
498 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
499 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
500 ((c) == 0x06D5) ||
501 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
502 (((c) >= 0x905) && ( /* accelerator */
503 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
504 ((c) == 0x093D) ||
505 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
506 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
507 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
508 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
509 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
510 ((c) == 0x09B2) ||
511 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
512 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
513 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
514 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
515 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
516 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
517 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
518 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
519 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
520 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
521 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
522 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
523 ((c) == 0x0A5E) ||
524 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
525 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
526 ((c) == 0x0A8D) ||
527 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
528 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
529 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
530 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
531 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
532 ((c) == 0x0ABD) ||
533 ((c) == 0x0AE0) ||
534 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
535 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
536 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
537 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
538 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
539 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
540 ((c) == 0x0B3D) ||
541 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
542 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
543 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
544 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
545 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
546 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
547 ((c) == 0x0B9C) ||
548 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
549 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
550 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
551 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
552 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
553 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
554 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
555 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
556 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
557 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
558 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
559 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
560 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
561 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
562 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
563 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
564 ((c) == 0x0CDE) ||
565 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
566 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
567 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
568 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
569 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
570 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
571 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
572 ((c) == 0x0E30) ||
573 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
574 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
575 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
576 ((c) == 0x0E84) ||
577 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
578 ((c) == 0x0E8A) ||
579 ((c) == 0x0E8D) ||
580 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
581 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
582 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
583 ((c) == 0x0EA5) ||
584 ((c) == 0x0EA7) ||
585 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
586 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
587 ((c) == 0x0EB0) ||
588 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
589 ((c) == 0x0EBD) ||
590 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
591 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
592 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
593 (((c) >= 0x10A0) && ( /* accelerator */
594 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
595 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
596 ((c) == 0x1100) ||
597 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
598 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
599 ((c) == 0x1109) ||
600 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
601 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
602 ((c) == 0x113C) ||
603 ((c) == 0x113E) ||
604 ((c) == 0x1140) ||
605 ((c) == 0x114C) ||
606 ((c) == 0x114E) ||
607 ((c) == 0x1150) ||
608 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
609 ((c) == 0x1159) ||
610 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
611 ((c) == 0x1163) ||
612 ((c) == 0x1165) ||
613 ((c) == 0x1167) ||
614 ((c) == 0x1169) ||
615 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
616 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
617 ((c) == 0x1175) ||
618 ((c) == 0x119E) ||
619 ((c) == 0x11A8) ||
620 ((c) == 0x11AB) ||
621 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
622 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
623 ((c) == 0x11BA) ||
624 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
625 ((c) == 0x11EB) ||
626 ((c) == 0x11F0) ||
627 ((c) == 0x11F9) ||
628 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
629 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
630 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
631 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
632 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
633 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
634 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
635 ((c) == 0x1F59) ||
636 ((c) == 0x1F5B) ||
637 ((c) == 0x1F5D) ||
638 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
639 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
640 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
641 ((c) == 0x1FBE) ||
642 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
643 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
644 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
645 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
646 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
647 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
648 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
649 ((c) == 0x2126) ||
650 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
651 ((c) == 0x212E) ||
652 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
653 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
654 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
655 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
656 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
657}
658
659/**
660 * xmlIsDigit:
661 * @c: an unicode character (int)
662 *
663 * Check whether the character is allowed by the production
664 * [88] Digit ::= ... long list see REC ...
665 *
666 * Returns 0 if not, non-zero otherwise
667 */
668int
669xmlIsDigit(int c) {
670 return(
671 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
672 (((c) >= 0x660) && ( /* accelerator */
673 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
674 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
675 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
676 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
677 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
678 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
679 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
680 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
681 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
682 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
683 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
684 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
685 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
686 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
687}
688
689/**
690 * xmlIsCombining:
691 * @c: an unicode character (int)
692 *
693 * Check whether the character is allowed by the production
694 * [87] CombiningChar ::= ... long list see REC ...
695 *
696 * Returns 0 if not, non-zero otherwise
697 */
698int
699xmlIsCombining(int c) {
700 return(
701 (((c) >= 0x300) && ( /* accelerator */
702 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
703 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
704 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
705 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
706 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
707 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
708 ((c) == 0x05BF) ||
709 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
710 ((c) == 0x05C4) ||
711 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
712 ((c) == 0x0670) ||
713 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
714 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
715 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
716 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
717 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
718 (((c) >= 0x0901) && ( /* accelerator */
719 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
720 ((c) == 0x093C) ||
721 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
722 ((c) == 0x094D) ||
723 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
724 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
725 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
726 ((c) == 0x09BC) ||
727 ((c) == 0x09BE) ||
728 ((c) == 0x09BF) ||
729 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
730 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
731 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
732 ((c) == 0x09D7) ||
733 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
734 (((c) >= 0x0A02) && ( /* accelerator */
735 ((c) == 0x0A02) ||
736 ((c) == 0x0A3C) ||
737 ((c) == 0x0A3E) ||
738 ((c) == 0x0A3F) ||
739 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
740 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
741 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
742 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
743 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
744 ((c) == 0x0ABC) ||
745 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
746 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
747 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
748 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
749 ((c) == 0x0B3C) ||
750 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
751 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
752 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
753 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
754 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
755 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
756 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
757 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
758 ((c) == 0x0BD7) ||
759 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
760 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
761 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
762 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
763 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
764 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
765 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
766 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
767 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
768 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
769 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
770 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
771 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
772 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
773 ((c) == 0x0D57) ||
774 (((c) >= 0x0E31) && ( /* accelerator */
775 ((c) == 0x0E31) ||
776 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
777 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
778 ((c) == 0x0EB1) ||
779 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
780 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
781 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
782 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 ((c) == 0x0F35) ||
784 ((c) == 0x0F37) ||
785 ((c) == 0x0F39) ||
786 ((c) == 0x0F3E) ||
787 ((c) == 0x0F3F) ||
788 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
789 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
790 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
791 ((c) == 0x0F97) ||
792 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
793 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
794 ((c) == 0x0FB9) ||
795 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
796 ((c) == 0x20E1) ||
797 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
798 ((c) == 0x3099) ||
799 ((c) == 0x309A))))))))));
800}
801
802/**
803 * xmlIsExtender:
804 * @c: an unicode character (int)
805 *
806 * Check whether the character is allowed by the production
807 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
808 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
809 * [#x309D-#x309E] | [#x30FC-#x30FE]
810 *
811 * Returns 0 if not, non-zero otherwise
812 */
813int
814xmlIsExtender(int c) {
815 switch (c) {
816 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
817 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
818 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
819 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
820 case 0x30FE:
821 return 1;
822 default:
823 return 0;
824 }
825}
826
827/**
828 * xmlIsIdeographic:
829 * @c: an unicode character (int)
830 *
831 * Check whether the character is allowed by the production
832 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
833 *
834 * Returns 0 if not, non-zero otherwise
835 */
836int
837xmlIsIdeographic(int c) {
838 return(((c) < 0x0100) ? 0 :
839 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
840 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
841 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 ((c) == 0x3007));
843}
844
845/**
846 * xmlIsLetter:
847 * @c: an unicode character (int)
848 *
849 * Check whether the character is allowed by the production
850 * [84] Letter ::= BaseChar | Ideographic
851 *
852 * Returns 0 if not, non-zero otherwise
853 */
854int
855xmlIsLetter(int c) {
856 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
857}
858
859/**
860 * xmlIsPubidChar:
861 * @c: an unicode character (int)
862 *
863 * Check whether the character is allowed by the production
864 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
865 *
866 * Returns 0 if not, non-zero otherwise
867 */
868int
869xmlIsPubidChar(int c) {
870 return(
871 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
872 (((c) >= 'a') && ((c) <= 'z')) ||
873 (((c) >= 'A') && ((c) <= 'Z')) ||
874 (((c) >= '0') && ((c) <= '9')) ||
875 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
876 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
877 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
878 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
879 ((c) == '$') || ((c) == '_') || ((c) == '%'));
880}
881
882/************************************************************************
883 * *
884 * Input handling functions for progressive parsing *
885 * *
886 ************************************************************************/
887
888/* #define DEBUG_INPUT */
889/* #define DEBUG_STACK */
890/* #define DEBUG_PUSH */
891
892
893/* we need to keep enough input to show errors in context */
894#define LINE_LEN 80
895
896#ifdef DEBUG_INPUT
897#define CHECK_BUFFER(in) check_buffer(in)
898
899void check_buffer(xmlParserInputPtr in) {
900 if (in->base != in->buf->buffer->content) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: base mismatch problem\n");
903 }
904 if (in->cur < in->base) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur < base problem\n");
907 }
908 if (in->cur > in->base + in->buf->buffer->use) {
909 xmlGenericError(xmlGenericErrorContext,
910 "xmlParserInput: cur > base + use problem\n");
911 }
912 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
913 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
914 in->buf->buffer->use, in->buf->buffer->size);
915}
916
917#else
918#define CHECK_BUFFER(in)
919#endif
920
921
922/**
923 * xmlParserInputRead:
924 * @in: an XML parser input
925 * @len: an indicative size for the lookahead
926 *
927 * This function refresh the input for the parser. It doesn't try to
928 * preserve pointers to the input buffer, and discard already read data
929 *
930 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
931 * end of this entity
932 */
933int
934xmlParserInputRead(xmlParserInputPtr in, int len) {
935 int ret;
936 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000937 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000938
939#ifdef DEBUG_INPUT
940 xmlGenericError(xmlGenericErrorContext, "Read\n");
941#endif
942 if (in->buf == NULL) return(-1);
943 if (in->base == NULL) return(-1);
944 if (in->cur == NULL) return(-1);
945 if (in->buf->buffer == NULL) return(-1);
946 if (in->buf->readcallback == NULL) return(-1);
947
948 CHECK_BUFFER(in);
949
950 used = in->cur - in->buf->buffer->content;
951 ret = xmlBufferShrink(in->buf->buffer, used);
952 if (ret > 0) {
953 in->cur -= ret;
954 in->consumed += ret;
955 }
956 ret = xmlParserInputBufferRead(in->buf, len);
957 if (in->base != in->buf->buffer->content) {
958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000959 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000962 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000963 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000965 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 CHECK_BUFFER(in);
968
969 return(ret);
970}
971
972/**
973 * xmlParserInputGrow:
974 * @in: an XML parser input
975 * @len: an indicative size for the lookahead
976 *
977 * This function increase the input for the parser. It tries to
978 * preserve pointers to the input buffer, and keep already read data
979 *
980 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
981 * end of this entity
982 */
983int
984xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988#ifdef DEBUG_INPUT
989 xmlGenericError(xmlGenericErrorContext, "Grow\n");
990#endif
991 if (in->buf == NULL) return(-1);
992 if (in->base == NULL) return(-1);
993 if (in->cur == NULL) return(-1);
994 if (in->buf->buffer == NULL) return(-1);
995
996 CHECK_BUFFER(in);
997
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 indx = in->cur - in->base;
999 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001000
1001 CHECK_BUFFER(in);
1002
1003 return(0);
1004 }
1005 if (in->buf->readcallback != NULL)
1006 ret = xmlParserInputBufferGrow(in->buf, len);
1007 else
1008 return(0);
1009
1010 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001011 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001012 * block, but we use it really as an integer to do some
1013 * pointer arithmetic. Insure will raise it as a bug but in
1014 * that specific case, that's not !
1015 */
1016 if (in->base != in->buf->buffer->content) {
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001021 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001022 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001023 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001024 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001025
1026 CHECK_BUFFER(in);
1027
1028 return(ret);
1029}
1030
1031/**
1032 * xmlParserInputShrink:
1033 * @in: an XML parser input
1034 *
1035 * This function removes used input for the parser.
1036 */
1037void
1038xmlParserInputShrink(xmlParserInputPtr in) {
1039 int used;
1040 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001041 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001042
1043#ifdef DEBUG_INPUT
1044 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1045#endif
1046 if (in->buf == NULL) return;
1047 if (in->base == NULL) return;
1048 if (in->cur == NULL) return;
1049 if (in->buf->buffer == NULL) return;
1050
1051 CHECK_BUFFER(in);
1052
1053 used = in->cur - in->buf->buffer->content;
1054 /*
1055 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001056 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001057 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001058 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001059 return;
1060 if (used > INPUT_CHUNK) {
1061 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 if (ret > 0) {
1063 in->cur -= ret;
1064 in->consumed += ret;
1065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001066 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001067 }
1068
1069 CHECK_BUFFER(in);
1070
1071 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 return;
1073 }
1074 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1075 if (in->base != in->buf->buffer->content) {
1076 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001077 * the buffer has been ereallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001083 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001084
1085 CHECK_BUFFER(in);
1086}
1087
1088/************************************************************************
1089 * *
1090 * UTF8 character input and related functions *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlNextChar:
1096 * @ctxt: the XML parser context
1097 *
1098 * Skip to the next char input char.
1099 */
1100
1101void
1102xmlNextChar(xmlParserCtxtPtr ctxt) {
1103 if (ctxt->instate == XML_PARSER_EOF)
1104 return;
1105
1106 /*
1107 * 2.11 End-of-Line Handling
1108 * the literal two-character sequence "#xD#xA" or a standalone
1109 * literal #xD, an XML processor must pass to the application
1110 * the single character #xA.
1111 */
1112 if (ctxt->token != 0) ctxt->token = 0;
1113 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1114 if ((*ctxt->input->cur == 0) &&
1115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
1207 if (*ctxt->input->cur == 0)
1208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
1210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1211 xmlParserHandlePEReference(ctxt);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
1230 }
1231 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1232
1233 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1234 ctxt->input->cur++;
1235 return;
1236}
1237
1238/**
1239 * xmlCurrentChar:
1240 * @ctxt: the XML parser context
1241 * @len: pointer to the length of the char read
1242 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001243 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001244 * bytes in the input buffer. Implement the end of line normalization:
1245 * 2.11 End-of-Line Handling
1246 * Wherever an external parsed entity or the literal entity value
1247 * of an internal parsed entity contains either the literal two-character
1248 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1249 * must pass to the application the single character #xA.
1250 * This behavior can conveniently be produced by normalizing all
1251 * line breaks to #xA on input, before parsing.)
1252 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001253 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001254 */
1255
1256int
1257xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1258 if (ctxt->instate == XML_PARSER_EOF)
1259 return(0);
1260
1261 if (ctxt->token != 0) {
1262 *len = 0;
1263 return(ctxt->token);
1264 }
1265 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1266 *len = 1;
1267 return((int) *ctxt->input->cur);
1268 }
1269 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1270 /*
1271 * We are supposed to handle UTF8, check it's valid
1272 * From rfc2044: encoding of the Unicode values on UTF-8:
1273 *
1274 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1275 * 0000 0000-0000 007F 0xxxxxxx
1276 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1277 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1278 *
1279 * Check for the 0x110000 limit too
1280 */
1281 const unsigned char *cur = ctxt->input->cur;
1282 unsigned char c;
1283 unsigned int val;
1284
1285 c = *cur;
1286 if (c & 0x80) {
1287 if (cur[1] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if ((cur[1] & 0xc0) != 0x80)
1290 goto encoding_error;
1291 if ((c & 0xe0) == 0xe0) {
1292
1293 if (cur[2] == 0)
1294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1295 if ((cur[2] & 0xc0) != 0x80)
1296 goto encoding_error;
1297 if ((c & 0xf0) == 0xf0) {
1298 if (cur[3] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if (((c & 0xf8) != 0xf0) ||
1301 ((cur[3] & 0xc0) != 0x80))
1302 goto encoding_error;
1303 /* 4-byte code */
1304 *len = 4;
1305 val = (cur[0] & 0x7) << 18;
1306 val |= (cur[1] & 0x3f) << 12;
1307 val |= (cur[2] & 0x3f) << 6;
1308 val |= cur[3] & 0x3f;
1309 } else {
1310 /* 3-byte code */
1311 *len = 3;
1312 val = (cur[0] & 0xf) << 12;
1313 val |= (cur[1] & 0x3f) << 6;
1314 val |= cur[2] & 0x3f;
1315 }
1316 } else {
1317 /* 2-byte code */
1318 *len = 2;
1319 val = (cur[0] & 0x1f) << 6;
1320 val |= cur[1] & 0x3f;
1321 }
1322 if (!IS_CHAR(val)) {
1323 if ((ctxt->sax != NULL) &&
1324 (ctxt->sax->error != NULL))
1325 ctxt->sax->error(ctxt->userData,
1326 "Char 0x%X out of allowed range\n", val);
1327 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1328 ctxt->wellFormed = 0;
1329 ctxt->disableSAX = 1;
1330 }
1331 return(val);
1332 } else {
1333 /* 1-byte code */
1334 *len = 1;
1335 if (*ctxt->input->cur == 0xD) {
1336 if (ctxt->input->cur[1] == 0xA) {
1337 ctxt->nbChars++;
1338 ctxt->input->cur++;
1339 }
1340 return(0xA);
1341 }
1342 return((int) *ctxt->input->cur);
1343 }
1344 }
1345 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001346 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001347 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001348 * XML constructs only use < 128 chars
1349 */
1350 *len = 1;
1351 if (*ctxt->input->cur == 0xD) {
1352 if (ctxt->input->cur[1] == 0xA) {
1353 ctxt->nbChars++;
1354 ctxt->input->cur++;
1355 }
1356 return(0xA);
1357 }
1358 return((int) *ctxt->input->cur);
1359encoding_error:
1360 /*
1361 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001362 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001363 * declaration header. Report the error and switch the encoding
1364 * to ISO-Latin-1 (if you don't like this policy, just declare the
1365 * encoding !)
1366 */
1367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1368 ctxt->sax->error(ctxt->userData,
1369 "Input is not proper UTF-8, indicate encoding !\n");
1370 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1371 ctxt->input->cur[0], ctxt->input->cur[1],
1372 ctxt->input->cur[2], ctxt->input->cur[3]);
1373 }
1374 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1375
1376 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1377 *len = 1;
1378 return((int) *ctxt->input->cur);
1379}
1380
1381/**
1382 * xmlStringCurrentChar:
1383 * @ctxt: the XML parser context
1384 * @cur: pointer to the beginning of the char
1385 * @len: pointer to the length of the char read
1386 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001387 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001388 * bytes in the input buffer.
1389 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001390 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001391 */
1392
1393int
1394xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillard61d80a22001-04-27 17:13:01 +00001395 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001396 /*
1397 * We are supposed to handle UTF8, check it's valid
1398 * From rfc2044: encoding of the Unicode values on UTF-8:
1399 *
1400 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1401 * 0000 0000-0000 007F 0xxxxxxx
1402 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1403 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1404 *
1405 * Check for the 0x110000 limit too
1406 */
1407 unsigned char c;
1408 unsigned int val;
1409
1410 c = *cur;
1411 if (c & 0x80) {
1412 if ((cur[1] & 0xc0) != 0x80)
1413 goto encoding_error;
1414 if ((c & 0xe0) == 0xe0) {
1415
1416 if ((cur[2] & 0xc0) != 0x80)
1417 goto encoding_error;
1418 if ((c & 0xf0) == 0xf0) {
1419 if (((c & 0xf8) != 0xf0) ||
1420 ((cur[3] & 0xc0) != 0x80))
1421 goto encoding_error;
1422 /* 4-byte code */
1423 *len = 4;
1424 val = (cur[0] & 0x7) << 18;
1425 val |= (cur[1] & 0x3f) << 12;
1426 val |= (cur[2] & 0x3f) << 6;
1427 val |= cur[3] & 0x3f;
1428 } else {
1429 /* 3-byte code */
1430 *len = 3;
1431 val = (cur[0] & 0xf) << 12;
1432 val |= (cur[1] & 0x3f) << 6;
1433 val |= cur[2] & 0x3f;
1434 }
1435 } else {
1436 /* 2-byte code */
1437 *len = 2;
1438 val = (cur[0] & 0x1f) << 6;
Daniel Veillarde043ee12001-04-16 14:08:07 +00001439 val |= cur[1] & 0x3f;
Owen Taylor3473f882001-02-23 17:55:21 +00001440 }
1441 if (!IS_CHAR(val)) {
1442 if ((ctxt->sax != NULL) &&
1443 (ctxt->sax->error != NULL))
1444 ctxt->sax->error(ctxt->userData,
1445 "Char 0x%X out of allowed range\n", val);
1446 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1447 ctxt->wellFormed = 0;
1448 ctxt->disableSAX = 1;
1449 }
1450 return(val);
1451 } else {
1452 /* 1-byte code */
1453 *len = 1;
1454 return((int) *cur);
1455 }
1456 }
1457 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001458 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001459 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001460 * XML constructs only use < 128 chars
1461 */
1462 *len = 1;
1463 return((int) *cur);
1464encoding_error:
1465 /*
1466 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001467 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001468 * declaration header. Report the error and switch the encoding
1469 * to ISO-Latin-1 (if you don't like this policy, just declare the
1470 * encoding !)
1471 */
1472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1473 ctxt->sax->error(ctxt->userData,
1474 "Input is not proper UTF-8, indicate encoding !\n");
1475 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1476 ctxt->input->cur[0], ctxt->input->cur[1],
1477 ctxt->input->cur[2], ctxt->input->cur[3]);
1478 }
1479 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1480
1481 *len = 1;
1482 return((int) *cur);
1483}
1484
1485/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001486 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001487 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001488 * @val: the char value
1489 *
1490 * append the char value in the array
1491 *
1492 * Returns the number of xmlChar written
1493 */
Owen Taylor3473f882001-02-23 17:55:21 +00001494int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001495xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001496 /*
1497 * We are supposed to handle UTF8, check it's valid
1498 * From rfc2044: encoding of the Unicode values on UTF-8:
1499 *
1500 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1501 * 0000 0000-0000 007F 0xxxxxxx
1502 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1503 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1504 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001505 if (val >= 0x80) {
1506 xmlChar *savedout = out;
1507 int bits;
1508 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1509 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1510 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1511 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001512 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001513 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001514 val);
1515 return(0);
1516 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001517 for ( ; bits >= 0; bits-= 6)
1518 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1519 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 }
1521 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001522 return 1;
1523}
1524
1525/**
1526 * xmlCopyChar:
1527 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001528 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001529 * @val: the char value
1530 *
1531 * append the char value in the array
1532 *
1533 * Returns the number of xmlChar written
1534 */
1535
1536int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001537xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538 /* the len parameter is ignored */
1539 if (val >= 0x80) {
1540 return(xmlCopyCharMultiByte (out, val));
1541 }
1542 *out = (xmlChar) val;
1543 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001544}
1545
1546/************************************************************************
1547 * *
1548 * Commodity functions to switch encodings *
1549 * *
1550 ************************************************************************/
1551
1552/**
1553 * xmlSwitchEncoding:
1554 * @ctxt: the parser context
1555 * @enc: the encoding value (number)
1556 *
1557 * change the input functions when discovering the character encoding
1558 * of a given entity.
1559 *
1560 * Returns 0 in case of success, -1 otherwise
1561 */
1562int
1563xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1564{
1565 xmlCharEncodingHandlerPtr handler;
1566
1567 switch (enc) {
1568 case XML_CHAR_ENCODING_ERROR:
1569 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1572 ctxt->wellFormed = 0;
1573 ctxt->disableSAX = 1;
1574 break;
1575 case XML_CHAR_ENCODING_NONE:
1576 /* let's assume it's UTF-8 without the XML decl */
1577 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1578 return(0);
1579 case XML_CHAR_ENCODING_UTF8:
1580 /* default encoding, no conversion should be needed */
1581 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001582
1583 /*
1584 * Errata on XML-1.0 June 20 2001
1585 * Specific handling of the Byte Order Mark for
1586 * UTF-8
1587 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001588 if ((ctxt->input != NULL) &&
1589 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001590 (ctxt->input->cur[1] == 0xBB) &&
1591 (ctxt->input->cur[2] == 0xBF)) {
1592 ctxt->input->cur += 3;
1593 }
Owen Taylor3473f882001-02-23 17:55:21 +00001594 return(0);
1595 default:
1596 break;
1597 }
1598 handler = xmlGetCharEncodingHandler(enc);
1599 if (handler == NULL) {
1600 /*
1601 * Default handlers.
1602 */
1603 switch (enc) {
1604 case XML_CHAR_ENCODING_ERROR:
1605 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1608 ctxt->wellFormed = 0;
1609 ctxt->disableSAX = 1;
1610 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1611 break;
1612 case XML_CHAR_ENCODING_NONE:
1613 /* let's assume it's UTF-8 without the XML decl */
1614 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1615 return(0);
1616 case XML_CHAR_ENCODING_UTF8:
1617 case XML_CHAR_ENCODING_ASCII:
1618 /* default encoding, no conversion should be needed */
1619 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1620 return(0);
1621 case XML_CHAR_ENCODING_UTF16LE:
1622 break;
1623 case XML_CHAR_ENCODING_UTF16BE:
1624 break;
1625 case XML_CHAR_ENCODING_UCS4LE:
1626 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1628 ctxt->sax->error(ctxt->userData,
1629 "char encoding USC4 little endian not supported\n");
1630 break;
1631 case XML_CHAR_ENCODING_UCS4BE:
1632 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1634 ctxt->sax->error(ctxt->userData,
1635 "char encoding USC4 big endian not supported\n");
1636 break;
1637 case XML_CHAR_ENCODING_EBCDIC:
1638 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1640 ctxt->sax->error(ctxt->userData,
1641 "char encoding EBCDIC not supported\n");
1642 break;
1643 case XML_CHAR_ENCODING_UCS4_2143:
1644 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1646 ctxt->sax->error(ctxt->userData,
1647 "char encoding UCS4 2143 not supported\n");
1648 break;
1649 case XML_CHAR_ENCODING_UCS4_3412:
1650 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1652 ctxt->sax->error(ctxt->userData,
1653 "char encoding UCS4 3412 not supported\n");
1654 break;
1655 case XML_CHAR_ENCODING_UCS2:
1656 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1658 ctxt->sax->error(ctxt->userData,
1659 "char encoding UCS2 not supported\n");
1660 break;
1661 case XML_CHAR_ENCODING_8859_1:
1662 case XML_CHAR_ENCODING_8859_2:
1663 case XML_CHAR_ENCODING_8859_3:
1664 case XML_CHAR_ENCODING_8859_4:
1665 case XML_CHAR_ENCODING_8859_5:
1666 case XML_CHAR_ENCODING_8859_6:
1667 case XML_CHAR_ENCODING_8859_7:
1668 case XML_CHAR_ENCODING_8859_8:
1669 case XML_CHAR_ENCODING_8859_9:
1670 /*
1671 * We used to keep the internal content in the
1672 * document encoding however this turns being unmaintainable
1673 * So xmlGetCharEncodingHandler() will return non-null
1674 * values for this now.
1675 */
1676 if ((ctxt->inputNr == 1) &&
1677 (ctxt->encoding == NULL) &&
1678 (ctxt->input->encoding != NULL)) {
1679 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1680 }
1681 ctxt->charset = enc;
1682 return(0);
1683 case XML_CHAR_ENCODING_2022_JP:
1684 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686 ctxt->sax->error(ctxt->userData,
1687 "char encoding ISO-2022-JPnot supported\n");
1688 break;
1689 case XML_CHAR_ENCODING_SHIFT_JIS:
1690 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692 ctxt->sax->error(ctxt->userData,
1693 "char encoding Shift_JIS not supported\n");
1694 break;
1695 case XML_CHAR_ENCODING_EUC_JP:
1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt->userData,
1699 "char encoding EUC-JPnot supported\n");
1700 break;
1701 }
1702 }
1703 if (handler == NULL)
1704 return(-1);
1705 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1706 return(xmlSwitchToEncoding(ctxt, handler));
1707}
1708
1709/**
1710 * xmlSwitchToEncoding:
1711 * @ctxt: the parser context
1712 * @handler: the encoding handler
1713 *
1714 * change the input functions when discovering the character encoding
1715 * of a given entity.
1716 *
1717 * Returns 0 in case of success, -1 otherwise
1718 */
1719int
1720xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1721{
1722 int nbchars;
1723
1724 if (handler != NULL) {
1725 if (ctxt->input != NULL) {
1726 if (ctxt->input->buf != NULL) {
1727 if (ctxt->input->buf->encoder != NULL) {
1728 if (ctxt->input->buf->encoder == handler)
1729 return(0);
1730 /*
1731 * Note: this is a bit dangerous, but that's what it
1732 * takes to use nearly compatible signature for different
1733 * encodings.
1734 */
1735 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1736 ctxt->input->buf->encoder = handler;
1737 return(0);
1738 }
1739 ctxt->input->buf->encoder = handler;
1740
1741 /*
1742 * Is there already some content down the pipe to convert ?
1743 */
1744 if ((ctxt->input->buf->buffer != NULL) &&
1745 (ctxt->input->buf->buffer->use > 0)) {
1746 int processed;
1747
1748 /*
1749 * Specific handling of the Byte Order Mark for
1750 * UTF-16
1751 */
1752 if ((handler->name != NULL) &&
1753 (!strcmp(handler->name, "UTF-16LE")) &&
1754 (ctxt->input->cur[0] == 0xFF) &&
1755 (ctxt->input->cur[1] == 0xFE)) {
1756 ctxt->input->cur += 2;
1757 }
1758 if ((handler->name != NULL) &&
1759 (!strcmp(handler->name, "UTF-16BE")) &&
1760 (ctxt->input->cur[0] == 0xFE) &&
1761 (ctxt->input->cur[1] == 0xFF)) {
1762 ctxt->input->cur += 2;
1763 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001764 /*
1765 * Errata on XML-1.0 June 20 2001
1766 * Specific handling of the Byte Order Mark for
1767 * UTF-8
1768 */
1769 if ((handler->name != NULL) &&
1770 (!strcmp(handler->name, "UTF-8")) &&
1771 (ctxt->input->cur[0] == 0xEF) &&
1772 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001773 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001774 ctxt->input->cur += 3;
1775 }
Owen Taylor3473f882001-02-23 17:55:21 +00001776
1777 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001778 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001779 * Move it as the raw buffer and create a new input buffer
1780 */
1781 processed = ctxt->input->cur - ctxt->input->base;
1782 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1783 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1784 ctxt->input->buf->buffer = xmlBufferCreate();
1785
1786 if (ctxt->html) {
1787 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001788 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001789 */
1790 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1791 ctxt->input->buf->buffer,
1792 ctxt->input->buf->raw);
1793 } else {
1794 /*
1795 * convert just enough to get
1796 * '<?xml version="1.0" encoding="xxx"?>'
1797 * parsed with the autodetected encoding
1798 * into the parser reading buffer.
1799 */
1800 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1801 ctxt->input->buf->buffer,
1802 ctxt->input->buf->raw);
1803 }
1804 if (nbchars < 0) {
1805 xmlGenericError(xmlGenericErrorContext,
1806 "xmlSwitchToEncoding: encoder error\n");
1807 return(-1);
1808 }
1809 ctxt->input->base =
1810 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001811 ctxt->input->end =
1812 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001813
1814 }
1815 return(0);
1816 } else {
1817 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1818 /*
1819 * When parsing a static memory array one must know the
1820 * size to be able to convert the buffer.
1821 */
1822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1823 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001824 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001825 return(-1);
1826 } else {
1827 int processed;
1828
1829 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001830 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001831 * Move it as the raw buffer and create a new input buffer
1832 */
1833 processed = ctxt->input->cur - ctxt->input->base;
1834
1835 ctxt->input->buf->raw = xmlBufferCreate();
1836 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1837 ctxt->input->length - processed);
1838 ctxt->input->buf->buffer = xmlBufferCreate();
1839
1840 /*
1841 * convert as much as possible of the raw input
1842 * to the parser reading buffer.
1843 */
1844 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1845 ctxt->input->buf->buffer,
1846 ctxt->input->buf->raw);
1847 if (nbchars < 0) {
1848 xmlGenericError(xmlGenericErrorContext,
1849 "xmlSwitchToEncoding: encoder error\n");
1850 return(-1);
1851 }
1852
1853 /*
1854 * Conversion succeeded, get rid of the old buffer
1855 */
1856 if ((ctxt->input->free != NULL) &&
1857 (ctxt->input->base != NULL))
1858 ctxt->input->free((xmlChar *) ctxt->input->base);
1859 ctxt->input->base =
1860 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861 ctxt->input->end =
1862 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001863 }
1864 }
1865 } else {
1866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1867 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001868 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001869 return(-1);
1870 }
1871 /*
1872 * The parsing is now done in UTF8 natively
1873 */
1874 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1875 } else
1876 return(-1);
1877 return(0);
1878
1879}
1880
1881/************************************************************************
1882 * *
1883 * Commodity functions to handle entities processing *
1884 * *
1885 ************************************************************************/
1886
1887/**
1888 * xmlFreeInputStream:
1889 * @input: an xmlParserInputPtr
1890 *
1891 * Free up an input stream.
1892 */
1893void
1894xmlFreeInputStream(xmlParserInputPtr input) {
1895 if (input == NULL) return;
1896
1897 if (input->filename != NULL) xmlFree((char *) input->filename);
1898 if (input->directory != NULL) xmlFree((char *) input->directory);
1899 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1900 if (input->version != NULL) xmlFree((char *) input->version);
1901 if ((input->free != NULL) && (input->base != NULL))
1902 input->free((xmlChar *) input->base);
1903 if (input->buf != NULL)
1904 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlFree(input);
1906}
1907
1908/**
1909 * xmlNewInputStream:
1910 * @ctxt: an XML parser context
1911 *
1912 * Create a new input stream structure
1913 * Returns the new input stream or NULL
1914 */
1915xmlParserInputPtr
1916xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1917 xmlParserInputPtr input;
1918
1919 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1920 if (input == NULL) {
1921 if (ctxt != NULL) {
1922 ctxt->errNo = XML_ERR_NO_MEMORY;
1923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1924 ctxt->sax->error(ctxt->userData,
1925 "malloc: couldn't allocate a new input stream\n");
1926 ctxt->errNo = XML_ERR_NO_MEMORY;
1927 }
1928 return(NULL);
1929 }
1930 memset(input, 0, sizeof(xmlParserInput));
1931 input->line = 1;
1932 input->col = 1;
1933 input->standalone = -1;
1934 return(input);
1935}
1936
1937/**
1938 * xmlNewIOInputStream:
1939 * @ctxt: an XML parser context
1940 * @input: an I/O Input
1941 * @enc: the charset encoding if known
1942 *
1943 * Create a new input stream structure encapsulating the @input into
1944 * a stream suitable for the parser.
1945 *
1946 * Returns the new input stream or NULL
1947 */
1948xmlParserInputPtr
1949xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1950 xmlCharEncoding enc) {
1951 xmlParserInputPtr inputStream;
1952
1953 if (xmlParserDebugEntities)
1954 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1955 inputStream = xmlNewInputStream(ctxt);
1956 if (inputStream == NULL) {
1957 return(NULL);
1958 }
1959 inputStream->filename = NULL;
1960 inputStream->buf = input;
1961 inputStream->base = inputStream->buf->buffer->content;
1962 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001963 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001964 if (enc != XML_CHAR_ENCODING_NONE) {
1965 xmlSwitchEncoding(ctxt, enc);
1966 }
1967
1968 return(inputStream);
1969}
1970
1971/**
1972 * xmlNewEntityInputStream:
1973 * @ctxt: an XML parser context
1974 * @entity: an Entity pointer
1975 *
1976 * Create a new input stream based on an xmlEntityPtr
1977 *
1978 * Returns the new input stream or NULL
1979 */
1980xmlParserInputPtr
1981xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1982 xmlParserInputPtr input;
1983
1984 if (entity == NULL) {
1985 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "internal: xmlNewEntityInputStream entity = NULL\n");
1989 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1990 return(NULL);
1991 }
1992 if (xmlParserDebugEntities)
1993 xmlGenericError(xmlGenericErrorContext,
1994 "new input from entity: %s\n", entity->name);
1995 if (entity->content == NULL) {
1996 switch (entity->etype) {
1997 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1998 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2000 ctxt->sax->error(ctxt->userData,
2001 "xmlNewEntityInputStream unparsed entity !\n");
2002 break;
2003 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2004 case XML_EXTERNAL_PARAMETER_ENTITY:
2005 return(xmlLoadExternalEntity((char *) entity->URI,
2006 (char *) entity->ExternalID, ctxt));
2007 case XML_INTERNAL_GENERAL_ENTITY:
2008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2009 ctxt->sax->error(ctxt->userData,
2010 "Internal entity %s without content !\n", entity->name);
2011 break;
2012 case XML_INTERNAL_PARAMETER_ENTITY:
2013 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2015 ctxt->sax->error(ctxt->userData,
2016 "Internal parameter entity %s without content !\n", entity->name);
2017 break;
2018 case XML_INTERNAL_PREDEFINED_ENTITY:
2019 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData,
2022 "Predefined entity %s without content !\n", entity->name);
2023 break;
2024 }
2025 return(NULL);
2026 }
2027 input = xmlNewInputStream(ctxt);
2028 if (input == NULL) {
2029 return(NULL);
2030 }
2031 input->filename = (char *) entity->URI;
2032 input->base = entity->content;
2033 input->cur = entity->content;
2034 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002036 return(input);
2037}
2038
2039/**
2040 * xmlNewStringInputStream:
2041 * @ctxt: an XML parser context
2042 * @buffer: an memory buffer
2043 *
2044 * Create a new input stream based on a memory buffer.
2045 * Returns the new input stream
2046 */
2047xmlParserInputPtr
2048xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2049 xmlParserInputPtr input;
2050
2051 if (buffer == NULL) {
2052 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2054 ctxt->sax->error(ctxt->userData,
2055 "internal: xmlNewStringInputStream string = NULL\n");
2056 return(NULL);
2057 }
2058 if (xmlParserDebugEntities)
2059 xmlGenericError(xmlGenericErrorContext,
2060 "new fixed input: %.30s\n", buffer);
2061 input = xmlNewInputStream(ctxt);
2062 if (input == NULL) {
2063 return(NULL);
2064 }
2065 input->base = buffer;
2066 input->cur = buffer;
2067 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002068 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002069 return(input);
2070}
2071
2072/**
2073 * xmlNewInputFromFile:
2074 * @ctxt: an XML parser context
2075 * @filename: the filename to use as entity
2076 *
2077 * Create a new input stream based on a file.
2078 *
2079 * Returns the new input stream or NULL in case of error
2080 */
2081xmlParserInputPtr
2082xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2083 xmlParserInputBufferPtr buf;
2084 xmlParserInputPtr inputStream;
2085 char *directory = NULL;
2086 xmlChar *URI = NULL;
2087
2088 if (xmlParserDebugEntities)
2089 xmlGenericError(xmlGenericErrorContext,
2090 "new input from file: %s\n", filename);
2091 if (ctxt == NULL) return(NULL);
2092 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2093 if (buf == NULL)
2094 return(NULL);
2095
2096 URI = xmlStrdup((xmlChar *) filename);
2097 directory = xmlParserGetDirectory((const char *) URI);
2098
2099 inputStream = xmlNewInputStream(ctxt);
2100 if (inputStream == NULL) {
2101 if (directory != NULL) xmlFree((char *) directory);
2102 if (URI != NULL) xmlFree((char *) URI);
2103 return(NULL);
2104 }
2105
2106 inputStream->filename = (const char *) URI;
2107 inputStream->directory = directory;
2108 inputStream->buf = buf;
2109
2110 inputStream->base = inputStream->buf->buffer->content;
2111 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002112 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002113 if ((ctxt->directory == NULL) && (directory != NULL))
2114 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2115 return(inputStream);
2116}
2117
2118/************************************************************************
2119 * *
2120 * Commodity functions to handle parser contexts *
2121 * *
2122 ************************************************************************/
2123
2124/**
2125 * xmlInitParserCtxt:
2126 * @ctxt: an XML parser context
2127 *
2128 * Initialize a parser context
2129 */
2130
2131void
2132xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2133{
2134 xmlSAXHandler *sax;
2135
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002136 if(ctxt==NULL) {
2137 xmlGenericError(xmlGenericErrorContext,
2138 "xmlInitParserCtxt: NULL context given\n");
2139 return;
2140 }
2141
Owen Taylor3473f882001-02-23 17:55:21 +00002142 xmlDefaultSAXHandlerInit();
2143
2144 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2145 if (sax == NULL) {
2146 xmlGenericError(xmlGenericErrorContext,
2147 "xmlInitParserCtxt: out of memory\n");
2148 }
2149 else
2150 memset(sax, 0, sizeof(xmlSAXHandler));
2151
2152 /* Allocate the Input stack */
2153 ctxt->inputTab = (xmlParserInputPtr *)
2154 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2155 if (ctxt->inputTab == NULL) {
2156 xmlGenericError(xmlGenericErrorContext,
2157 "xmlInitParserCtxt: out of memory\n");
2158 ctxt->inputNr = 0;
2159 ctxt->inputMax = 0;
2160 ctxt->input = NULL;
2161 return;
2162 }
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 5;
2165 ctxt->input = NULL;
2166
2167 ctxt->version = NULL;
2168 ctxt->encoding = NULL;
2169 ctxt->standalone = -1;
2170 ctxt->hasExternalSubset = 0;
2171 ctxt->hasPErefs = 0;
2172 ctxt->html = 0;
2173 ctxt->external = 0;
2174 ctxt->instate = XML_PARSER_START;
2175 ctxt->token = 0;
2176 ctxt->directory = NULL;
2177
2178 /* Allocate the Node stack */
2179 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2180 if (ctxt->nodeTab == NULL) {
2181 xmlGenericError(xmlGenericErrorContext,
2182 "xmlInitParserCtxt: out of memory\n");
2183 ctxt->nodeNr = 0;
2184 ctxt->nodeMax = 0;
2185 ctxt->node = NULL;
2186 ctxt->inputNr = 0;
2187 ctxt->inputMax = 0;
2188 ctxt->input = NULL;
2189 return;
2190 }
2191 ctxt->nodeNr = 0;
2192 ctxt->nodeMax = 10;
2193 ctxt->node = NULL;
2194
2195 /* Allocate the Name stack */
2196 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2197 if (ctxt->nameTab == NULL) {
2198 xmlGenericError(xmlGenericErrorContext,
2199 "xmlInitParserCtxt: out of memory\n");
2200 ctxt->nodeNr = 0;
2201 ctxt->nodeMax = 0;
2202 ctxt->node = NULL;
2203 ctxt->inputNr = 0;
2204 ctxt->inputMax = 0;
2205 ctxt->input = NULL;
2206 ctxt->nameNr = 0;
2207 ctxt->nameMax = 0;
2208 ctxt->name = NULL;
2209 return;
2210 }
2211 ctxt->nameNr = 0;
2212 ctxt->nameMax = 10;
2213 ctxt->name = NULL;
2214
2215 /* Allocate the space stack */
2216 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2217 if (ctxt->spaceTab == NULL) {
2218 xmlGenericError(xmlGenericErrorContext,
2219 "xmlInitParserCtxt: out of memory\n");
2220 ctxt->nodeNr = 0;
2221 ctxt->nodeMax = 0;
2222 ctxt->node = NULL;
2223 ctxt->inputNr = 0;
2224 ctxt->inputMax = 0;
2225 ctxt->input = NULL;
2226 ctxt->nameNr = 0;
2227 ctxt->nameMax = 0;
2228 ctxt->name = NULL;
2229 ctxt->spaceNr = 0;
2230 ctxt->spaceMax = 0;
2231 ctxt->space = NULL;
2232 return;
2233 }
2234 ctxt->spaceNr = 1;
2235 ctxt->spaceMax = 10;
2236 ctxt->spaceTab[0] = -1;
2237 ctxt->space = &ctxt->spaceTab[0];
2238
Daniel Veillard14be0a12001-03-03 18:50:55 +00002239 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002240 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002241
Owen Taylor3473f882001-02-23 17:55:21 +00002242 ctxt->userData = ctxt;
2243 ctxt->myDoc = NULL;
2244 ctxt->wellFormed = 1;
2245 ctxt->valid = 1;
2246 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2247 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2248 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002249 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002250 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002251 if (ctxt->keepBlanks == 0)
2252 sax->ignorableWhitespace = ignorableWhitespace;
2253
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->vctxt.userData = ctxt;
2255 if (ctxt->validate) {
2256 ctxt->vctxt.error = xmlParserValidityError;
2257 if (xmlGetWarningsDefaultValue == 0)
2258 ctxt->vctxt.warning = NULL;
2259 else
2260 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002261 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002262 } else {
2263 ctxt->vctxt.error = NULL;
2264 ctxt->vctxt.warning = NULL;
2265 }
2266 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2267 ctxt->record_info = 0;
2268 ctxt->nbChars = 0;
2269 ctxt->checkIndex = 0;
2270 ctxt->inSubset = 0;
2271 ctxt->errNo = XML_ERR_OK;
2272 ctxt->depth = 0;
2273 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002274 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002275 xmlInitNodeInfoSeq(&ctxt->node_seq);
2276}
2277
2278/**
2279 * xmlFreeParserCtxt:
2280 * @ctxt: an XML parser context
2281 *
2282 * Free all the memory used by a parser context. However the parsed
2283 * document in ctxt->myDoc is not freed.
2284 */
2285
2286void
2287xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2288{
2289 xmlParserInputPtr input;
2290 xmlChar *oldname;
2291
2292 if (ctxt == NULL) return;
2293
2294 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2295 xmlFreeInputStream(input);
2296 }
2297 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2298 xmlFree(oldname);
2299 }
2300 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2301 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2302 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2303 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2304 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2305 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2306 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2307 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2308 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2310 xmlFree(ctxt->sax);
2311 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002312 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002313#ifdef LIBXML_CATALOG_ENABLED
2314 if (ctxt->catalogs != NULL)
2315 xmlCatalogFreeLocal(ctxt->catalogs);
2316#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002317 xmlFree(ctxt);
2318}
2319
2320/**
2321 * xmlNewParserCtxt:
2322 *
2323 * Allocate and initialize a new parser context.
2324 *
2325 * Returns the xmlParserCtxtPtr or NULL
2326 */
2327
2328xmlParserCtxtPtr
2329xmlNewParserCtxt()
2330{
2331 xmlParserCtxtPtr ctxt;
2332
2333 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2334 if (ctxt == NULL) {
2335 xmlGenericError(xmlGenericErrorContext,
2336 "xmlNewParserCtxt : cannot allocate context\n");
2337 perror("malloc");
2338 return(NULL);
2339 }
2340 memset(ctxt, 0, sizeof(xmlParserCtxt));
2341 xmlInitParserCtxt(ctxt);
2342 return(ctxt);
2343}
2344
2345/************************************************************************
2346 * *
2347 * Handling of node informations *
2348 * *
2349 ************************************************************************/
2350
2351/**
2352 * xmlClearParserCtxt:
2353 * @ctxt: an XML parser context
2354 *
2355 * Clear (release owned resources) and reinitialize a parser context
2356 */
2357
2358void
2359xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2360{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002361 if (ctxt==NULL)
2362 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002363 xmlClearNodeInfoSeq(&ctxt->node_seq);
2364 xmlInitParserCtxt(ctxt);
2365}
2366
2367/**
2368 * xmlParserFindNodeInfo:
2369 * @ctxt: an XML parser context
2370 * @node: an XML node within the tree
2371 *
2372 * Find the parser node info struct for a given node
2373 *
2374 * Returns an xmlParserNodeInfo block pointer or NULL
2375 */
2376const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2377 const xmlNode* node)
2378{
2379 unsigned long pos;
2380
2381 /* Find position where node should be at */
2382 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002383 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002384 return &ctx->node_seq.buffer[pos];
2385 else
2386 return NULL;
2387}
2388
2389
2390/**
2391 * xmlInitNodeInfoSeq:
2392 * @seq: a node info sequence pointer
2393 *
2394 * -- Initialize (set to initial state) node info sequence
2395 */
2396void
2397xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2398{
2399 seq->length = 0;
2400 seq->maximum = 0;
2401 seq->buffer = NULL;
2402}
2403
2404/**
2405 * xmlClearNodeInfoSeq:
2406 * @seq: a node info sequence pointer
2407 *
2408 * -- Clear (release memory and reinitialize) node
2409 * info sequence
2410 */
2411void
2412xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2413{
2414 if ( seq->buffer != NULL )
2415 xmlFree(seq->buffer);
2416 xmlInitNodeInfoSeq(seq);
2417}
2418
2419
2420/**
2421 * xmlParserFindNodeInfoIndex:
2422 * @seq: a node info sequence pointer
2423 * @node: an XML node pointer
2424 *
2425 *
2426 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2427 * the given node is or should be at in a sorted sequence
2428 *
2429 * Returns a long indicating the position of the record
2430 */
2431unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2432 const xmlNode* node)
2433{
2434 unsigned long upper, lower, middle;
2435 int found = 0;
2436
2437 /* Do a binary search for the key */
2438 lower = 1;
2439 upper = seq->length;
2440 middle = 0;
2441 while ( lower <= upper && !found) {
2442 middle = lower + (upper - lower) / 2;
2443 if ( node == seq->buffer[middle - 1].node )
2444 found = 1;
2445 else if ( node < seq->buffer[middle - 1].node )
2446 upper = middle - 1;
2447 else
2448 lower = middle + 1;
2449 }
2450
2451 /* Return position */
2452 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2453 return middle;
2454 else
2455 return middle - 1;
2456}
2457
2458
2459/**
2460 * xmlParserAddNodeInfo:
2461 * @ctxt: an XML parser context
2462 * @info: a node info sequence pointer
2463 *
2464 * Insert node info record into the sorted sequence
2465 */
2466void
2467xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2468 const xmlParserNodeInfo* info)
2469{
2470 unsigned long pos;
2471 static unsigned int block_size = 5;
2472
2473 /* Find pos and check to see if node is already in the sequence */
2474 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2475 if ( pos < ctxt->node_seq.length
2476 && ctxt->node_seq.buffer[pos].node == info->node ) {
2477 ctxt->node_seq.buffer[pos] = *info;
2478 }
2479
2480 /* Otherwise, we need to add new node to buffer */
2481 else {
2482 /* Expand buffer by 5 if needed */
2483 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2484 xmlParserNodeInfo* tmp_buffer;
2485 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2486 *(ctxt->node_seq.maximum + block_size));
2487
2488 if ( ctxt->node_seq.buffer == NULL )
2489 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2490 else
2491 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2492
2493 if ( tmp_buffer == NULL ) {
2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2496 ctxt->errNo = XML_ERR_NO_MEMORY;
2497 return;
2498 }
2499 ctxt->node_seq.buffer = tmp_buffer;
2500 ctxt->node_seq.maximum += block_size;
2501 }
2502
2503 /* If position is not at end, move elements out of the way */
2504 if ( pos != ctxt->node_seq.length ) {
2505 unsigned long i;
2506
2507 for ( i = ctxt->node_seq.length; i > pos; i-- )
2508 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2509 }
2510
2511 /* Copy element and increase length */
2512 ctxt->node_seq.buffer[pos] = *info;
2513 ctxt->node_seq.length++;
2514 }
2515}
2516
2517/************************************************************************
2518 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002519 * Defaults settings *
2520 * *
2521 ************************************************************************/
2522/**
2523 * xmlPedanticParserDefault:
2524 * @val: int 0 or 1
2525 *
2526 * Set and return the previous value for enabling pedantic warnings.
2527 *
2528 * Returns the last value for 0 for no substitution, 1 for substitution.
2529 */
2530
2531int
2532xmlPedanticParserDefault(int val) {
2533 int old = xmlPedanticParserDefaultValue;
2534
2535 xmlPedanticParserDefaultValue = val;
2536 return(old);
2537}
2538
2539/**
2540 * xmlLineNumbersDefault:
2541 * @val: int 0 or 1
2542 *
2543 * Set and return the previous value for enabling line numbers in elements
2544 * contents. This may break on old application and is turned off by default.
2545 *
2546 * Returns the last value for 0 for no substitution, 1 for substitution.
2547 */
2548
2549int
2550xmlLineNumbersDefault(int val) {
2551 int old = xmlLineNumbersDefaultValue;
2552
2553 xmlLineNumbersDefaultValue = val;
2554 return(old);
2555}
2556
2557/**
2558 * xmlSubstituteEntitiesDefault:
2559 * @val: int 0 or 1
2560 *
2561 * Set and return the previous value for default entity support.
2562 * Initially the parser always keep entity references instead of substituting
2563 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002564 * default parser behavior
2565 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002566 * file basis.
2567 *
2568 * Returns the last value for 0 for no substitution, 1 for substitution.
2569 */
2570
2571int
2572xmlSubstituteEntitiesDefault(int val) {
2573 int old = xmlSubstituteEntitiesDefaultValue;
2574
2575 xmlSubstituteEntitiesDefaultValue = val;
2576 return(old);
2577}
2578
2579/**
2580 * xmlKeepBlanksDefault:
2581 * @val: int 0 or 1
2582 *
2583 * Set and return the previous value for default blanks text nodes support.
2584 * The 1.x version of the parser used an heuristic to try to detect
2585 * ignorable white spaces. As a result the SAX callback was generating
2586 * ignorableWhitespace() callbacks instead of characters() one, and when
2587 * using the DOM output text nodes containing those blanks were not generated.
2588 * The 2.x and later version will switch to the XML standard way and
2589 * ignorableWhitespace() are only generated when running the parser in
2590 * validating mode and when the current element doesn't allow CDATA or
2591 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002592 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002593 * on 1.X libs and to switch back to the old mode for compatibility when
2594 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2595 * by using xmlIsBlankNode() commodity function to detect the "empty"
2596 * nodes generated.
2597 * This value also affect autogeneration of indentation when saving code
2598 * if blanks sections are kept, indentation is not generated.
2599 *
2600 * Returns the last value for 0 for no substitution, 1 for substitution.
2601 */
2602
2603int
2604xmlKeepBlanksDefault(int val) {
2605 int old = xmlKeepBlanksDefaultValue;
2606
2607 xmlKeepBlanksDefaultValue = val;
2608 xmlIndentTreeOutput = !val;
2609 return(old);
2610}
2611
2612/************************************************************************
2613 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002614 * Deprecated functions kept for compatibility *
2615 * *
2616 ************************************************************************/
2617
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002618/**
2619 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002620 * @lang: pointer to the string value
2621 *
2622 * Checks that the value conforms to the LanguageID production:
2623 *
2624 * NOTE: this is somewhat deprecated, those productions were removed from
2625 * the XML Second edition.
2626 *
2627 * [33] LanguageID ::= Langcode ('-' Subcode)*
2628 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2629 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2630 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2631 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2632 * [38] Subcode ::= ([a-z] | [A-Z])+
2633 *
2634 * Returns 1 if correct 0 otherwise
2635 **/
2636int
2637xmlCheckLanguageID(const xmlChar *lang) {
2638 const xmlChar *cur = lang;
2639
2640 if (cur == NULL)
2641 return(0);
2642 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2643 ((cur[0] == 'I') && (cur[1] == '-'))) {
2644 /*
2645 * IANA code
2646 */
2647 cur += 2;
2648 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2649 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2650 cur++;
2651 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2652 ((cur[0] == 'X') && (cur[1] == '-'))) {
2653 /*
2654 * User code
2655 */
2656 cur += 2;
2657 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2658 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2659 cur++;
2660 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2661 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2662 /*
2663 * ISO639
2664 */
2665 cur++;
2666 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2667 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2668 cur++;
2669 else
2670 return(0);
2671 } else
2672 return(0);
2673 while (cur[0] != 0) { /* non input consuming */
2674 if (cur[0] != '-')
2675 return(0);
2676 cur++;
2677 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2678 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2679 cur++;
2680 else
2681 return(0);
2682 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2683 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2684 cur++;
2685 }
2686 return(1);
2687}
2688
2689/**
2690 * xmlDecodeEntities:
2691 * @ctxt: the parser context
2692 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2693 * @len: the len to decode (in bytes !), -1 for no size limit
2694 * @end: an end marker xmlChar, 0 if none
2695 * @end2: an end marker xmlChar, 0 if none
2696 * @end3: an end marker xmlChar, 0 if none
2697 *
2698 * This function is deprecated, we now always process entities content
2699 * through xmlStringDecodeEntities
2700 *
2701 * TODO: remove it in next major release.
2702 *
2703 * [67] Reference ::= EntityRef | CharRef
2704 *
2705 * [69] PEReference ::= '%' Name ';'
2706 *
2707 * Returns A newly allocated string with the substitution done. The caller
2708 * must deallocate it !
2709 */
2710xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002711xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2712 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002713#if 0
2714 xmlChar *buffer = NULL;
2715 unsigned int buffer_size = 0;
2716 unsigned int nbchars = 0;
2717
2718 xmlChar *current = NULL;
2719 xmlEntityPtr ent;
2720 unsigned int max = (unsigned int) len;
2721 int c,l;
2722#endif
2723
2724 static int deprecated = 0;
2725 if (!deprecated) {
2726 xmlGenericError(xmlGenericErrorContext,
2727 "xmlDecodeEntities() deprecated function reached\n");
2728 deprecated = 1;
2729 }
2730
2731#if 0
2732 if (ctxt->depth > 40) {
2733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2734 ctxt->sax->error(ctxt->userData,
2735 "Detected entity reference loop\n");
2736 ctxt->wellFormed = 0;
2737 ctxt->disableSAX = 1;
2738 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2739 return(NULL);
2740 }
2741
2742 /*
2743 * allocate a translation buffer.
2744 */
2745 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2746 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2747 if (buffer == NULL) {
2748 perror("xmlDecodeEntities: malloc failed");
2749 return(NULL);
2750 }
2751
2752 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002753 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002754 */
2755 GROW;
2756 c = CUR_CHAR(l);
2757 while ((nbchars < max) && (c != end) && /* NOTUSED */
2758 (c != end2) && (c != end3)) {
2759 GROW;
2760 if (c == 0) break;
2761 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2762 int val = xmlParseCharRef(ctxt);
2763 COPY_BUF(0,buffer,nbchars,val);
2764 NEXTL(l);
2765 } else if ((c == '&') && (ctxt->token != '&') &&
2766 (what & XML_SUBSTITUTE_REF)) {
2767 if (xmlParserDebugEntities)
2768 xmlGenericError(xmlGenericErrorContext,
2769 "decoding Entity Reference\n");
2770 ent = xmlParseEntityRef(ctxt);
2771 if ((ent != NULL) &&
2772 (ctxt->replaceEntities != 0)) {
2773 current = ent->content;
2774 while (*current != 0) { /* non input consuming loop */
2775 buffer[nbchars++] = *current++;
2776 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2777 growBuffer(buffer);
2778 }
2779 }
2780 } else if (ent != NULL) {
2781 const xmlChar *cur = ent->name;
2782
2783 buffer[nbchars++] = '&';
2784 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2785 growBuffer(buffer);
2786 }
2787 while (*cur != 0) { /* non input consuming loop */
2788 buffer[nbchars++] = *cur++;
2789 }
2790 buffer[nbchars++] = ';';
2791 }
2792 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2793 /*
2794 * a PEReference induce to switch the entity flow,
2795 * we break here to flush the current set of chars
2796 * parsed if any. We will be called back later.
2797 */
2798 if (xmlParserDebugEntities)
2799 xmlGenericError(xmlGenericErrorContext,
2800 "decoding PE Reference\n");
2801 if (nbchars != 0) break;
2802
2803 xmlParsePEReference(ctxt);
2804
2805 /*
2806 * Pop-up of finished entities.
2807 */
2808 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2809 xmlPopInput(ctxt);
2810
2811 break;
2812 } else {
2813 COPY_BUF(l,buffer,nbchars,c);
2814 NEXTL(l);
2815 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2816 growBuffer(buffer);
2817 }
2818 }
2819 c = CUR_CHAR(l);
2820 }
2821 buffer[nbchars++] = 0;
2822 return(buffer);
2823#endif
2824 return(NULL);
2825}
2826
2827/**
2828 * xmlNamespaceParseNCName:
2829 * @ctxt: an XML parser context
2830 *
2831 * parse an XML namespace name.
2832 *
2833 * TODO: this seems not in use anymore, the namespace handling is done on
2834 * top of the SAX interfaces, i.e. not on raw input.
2835 *
2836 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2837 *
2838 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2839 * CombiningChar | Extender
2840 *
2841 * Returns the namespace name or NULL
2842 */
2843
2844xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002845xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002846#if 0
2847 xmlChar buf[XML_MAX_NAMELEN + 5];
2848 int len = 0, l;
2849 int cur = CUR_CHAR(l);
2850#endif
2851
2852 static int deprecated = 0;
2853 if (!deprecated) {
2854 xmlGenericError(xmlGenericErrorContext,
2855 "xmlNamespaceParseNCName() deprecated function reached\n");
2856 deprecated = 1;
2857 }
2858
2859#if 0
2860 /* load first the value of the char !!! */
2861 GROW;
2862 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2863
2864xmlGenericError(xmlGenericErrorContext,
2865 "xmlNamespaceParseNCName: reached loop 3\n");
2866 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2867 (cur == '.') || (cur == '-') ||
2868 (cur == '_') ||
2869 (IS_COMBINING(cur)) ||
2870 (IS_EXTENDER(cur))) {
2871 COPY_BUF(l,buf,len,cur);
2872 NEXTL(l);
2873 cur = CUR_CHAR(l);
2874 if (len >= XML_MAX_NAMELEN) {
2875 xmlGenericError(xmlGenericErrorContext,
2876 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2877 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2878 (cur == '.') || (cur == '-') ||
2879 (cur == '_') ||
2880 (IS_COMBINING(cur)) ||
2881 (IS_EXTENDER(cur))) {
2882 NEXTL(l);
2883 cur = CUR_CHAR(l);
2884 }
2885 break;
2886 }
2887 }
2888 return(xmlStrndup(buf, len));
2889#endif
2890 return(NULL);
2891}
2892
2893/**
2894 * xmlNamespaceParseQName:
2895 * @ctxt: an XML parser context
2896 * @prefix: a xmlChar **
2897 *
2898 * TODO: this seems not in use anymore, the namespace handling is done on
2899 * top of the SAX interfaces, i.e. not on raw input.
2900 *
2901 * parse an XML qualified name
2902 *
2903 * [NS 5] QName ::= (Prefix ':')? LocalPart
2904 *
2905 * [NS 6] Prefix ::= NCName
2906 *
2907 * [NS 7] LocalPart ::= NCName
2908 *
2909 * Returns the local part, and prefix is updated
2910 * to get the Prefix if any.
2911 */
2912
2913xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002914xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002915
2916 static int deprecated = 0;
2917 if (!deprecated) {
2918 xmlGenericError(xmlGenericErrorContext,
2919 "xmlNamespaceParseQName() deprecated function reached\n");
2920 deprecated = 1;
2921 }
2922
2923#if 0
2924 xmlChar *ret = NULL;
2925
2926 *prefix = NULL;
2927 ret = xmlNamespaceParseNCName(ctxt);
2928 if (RAW == ':') {
2929 *prefix = ret;
2930 NEXT;
2931 ret = xmlNamespaceParseNCName(ctxt);
2932 }
2933
2934 return(ret);
2935#endif
2936 return(NULL);
2937}
2938
2939/**
2940 * xmlNamespaceParseNSDef:
2941 * @ctxt: an XML parser context
2942 *
2943 * parse a namespace prefix declaration
2944 *
2945 * TODO: this seems not in use anymore, the namespace handling is done on
2946 * top of the SAX interfaces, i.e. not on raw input.
2947 *
2948 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2949 *
2950 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2951 *
2952 * Returns the namespace name
2953 */
2954
2955xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002956xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002957 static int deprecated = 0;
2958 if (!deprecated) {
2959 xmlGenericError(xmlGenericErrorContext,
2960 "xmlNamespaceParseNSDef() deprecated function reached\n");
2961 deprecated = 1;
2962 }
2963 return(NULL);
2964#if 0
2965 xmlChar *name = NULL;
2966
2967 if ((RAW == 'x') && (NXT(1) == 'm') &&
2968 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2969 (NXT(4) == 's')) {
2970 SKIP(5);
2971 if (RAW == ':') {
2972 NEXT;
2973 name = xmlNamespaceParseNCName(ctxt);
2974 }
2975 }
2976 return(name);
2977#endif
2978}
2979
2980/**
2981 * xmlParseQuotedString:
2982 * @ctxt: an XML parser context
2983 *
2984 * Parse and return a string between quotes or doublequotes
2985 *
2986 * TODO: Deprecated, to be removed at next drop of binary compatibility
2987 *
2988 * Returns the string parser or NULL.
2989 */
2990xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002991xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002992 static int deprecated = 0;
2993 if (!deprecated) {
2994 xmlGenericError(xmlGenericErrorContext,
2995 "xmlParseQuotedString() deprecated function reached\n");
2996 deprecated = 1;
2997 }
2998 return(NULL);
2999
3000#if 0
3001 xmlChar *buf = NULL;
3002 int len = 0,l;
3003 int size = XML_PARSER_BUFFER_SIZE;
3004 int c;
3005
3006 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3007 if (buf == NULL) {
3008 xmlGenericError(xmlGenericErrorContext,
3009 "malloc of %d byte failed\n", size);
3010 return(NULL);
3011 }
3012xmlGenericError(xmlGenericErrorContext,
3013 "xmlParseQuotedString: reached loop 4\n");
3014 if (RAW == '"') {
3015 NEXT;
3016 c = CUR_CHAR(l);
3017 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3018 if (len + 5 >= size) {
3019 size *= 2;
3020 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3021 if (buf == NULL) {
3022 xmlGenericError(xmlGenericErrorContext,
3023 "realloc of %d byte failed\n", size);
3024 return(NULL);
3025 }
3026 }
3027 COPY_BUF(l,buf,len,c);
3028 NEXTL(l);
3029 c = CUR_CHAR(l);
3030 }
3031 if (c != '"') {
3032 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "String not closed \"%.50s\"\n", buf);
3036 ctxt->wellFormed = 0;
3037 ctxt->disableSAX = 1;
3038 } else {
3039 NEXT;
3040 }
3041 } else if (RAW == '\''){
3042 NEXT;
3043 c = CUR;
3044 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3045 if (len + 1 >= size) {
3046 size *= 2;
3047 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3048 if (buf == NULL) {
3049 xmlGenericError(xmlGenericErrorContext,
3050 "realloc of %d byte failed\n", size);
3051 return(NULL);
3052 }
3053 }
3054 buf[len++] = c;
3055 NEXT;
3056 c = CUR;
3057 }
3058 if (RAW != '\'') {
3059 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3061 ctxt->sax->error(ctxt->userData,
3062 "String not closed \"%.50s\"\n", buf);
3063 ctxt->wellFormed = 0;
3064 ctxt->disableSAX = 1;
3065 } else {
3066 NEXT;
3067 }
3068 }
3069 return(buf);
3070#endif
3071}
3072
3073/**
3074 * xmlParseNamespace:
3075 * @ctxt: an XML parser context
3076 *
3077 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3078 *
3079 * This is what the older xml-name Working Draft specified, a bunch of
3080 * other stuff may still rely on it, so support is still here as
3081 * if it was declared on the root of the Tree:-(
3082 *
3083 * TODO: remove from library
3084 *
3085 * To be removed at next drop of binary compatibility
3086 */
3087
3088void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003089xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003090 static int deprecated = 0;
3091 if (!deprecated) {
3092 xmlGenericError(xmlGenericErrorContext,
3093 "xmlParseNamespace() deprecated function reached\n");
3094 deprecated = 1;
3095 }
3096
3097#if 0
3098 xmlChar *href = NULL;
3099 xmlChar *prefix = NULL;
3100 int garbage = 0;
3101
3102 /*
3103 * We just skipped "namespace" or "xml:namespace"
3104 */
3105 SKIP_BLANKS;
3106
3107xmlGenericError(xmlGenericErrorContext,
3108 "xmlParseNamespace: reached loop 5\n");
3109 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3110 /*
3111 * We can have "ns" or "prefix" attributes
3112 * Old encoding as 'href' or 'AS' attributes is still supported
3113 */
3114 if ((RAW == 'n') && (NXT(1) == 's')) {
3115 garbage = 0;
3116 SKIP(2);
3117 SKIP_BLANKS;
3118
3119 if (RAW != '=') continue;
3120 NEXT;
3121 SKIP_BLANKS;
3122
3123 href = xmlParseQuotedString(ctxt);
3124 SKIP_BLANKS;
3125 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3126 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3127 garbage = 0;
3128 SKIP(4);
3129 SKIP_BLANKS;
3130
3131 if (RAW != '=') continue;
3132 NEXT;
3133 SKIP_BLANKS;
3134
3135 href = xmlParseQuotedString(ctxt);
3136 SKIP_BLANKS;
3137 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3138 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3139 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3140 garbage = 0;
3141 SKIP(6);
3142 SKIP_BLANKS;
3143
3144 if (RAW != '=') continue;
3145 NEXT;
3146 SKIP_BLANKS;
3147
3148 prefix = xmlParseQuotedString(ctxt);
3149 SKIP_BLANKS;
3150 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3151 garbage = 0;
3152 SKIP(2);
3153 SKIP_BLANKS;
3154
3155 if (RAW != '=') continue;
3156 NEXT;
3157 SKIP_BLANKS;
3158
3159 prefix = xmlParseQuotedString(ctxt);
3160 SKIP_BLANKS;
3161 } else if ((RAW == '?') && (NXT(1) == '>')) {
3162 garbage = 0;
3163 NEXT;
3164 } else {
3165 /*
3166 * Found garbage when parsing the namespace
3167 */
3168 if (!garbage) {
3169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3170 ctxt->sax->error(ctxt->userData,
3171 "xmlParseNamespace found garbage\n");
3172 }
3173 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 NEXT;
3177 }
3178 }
3179
3180 MOVETO_ENDTAG(CUR_PTR);
3181 NEXT;
3182
3183 /*
3184 * Register the DTD.
3185 if (href != NULL)
3186 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3187 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3188 */
3189
3190 if (prefix != NULL) xmlFree(prefix);
3191 if (href != NULL) xmlFree(href);
3192#endif
3193}
3194
3195/**
3196 * xmlScanName:
3197 * @ctxt: an XML parser context
3198 *
3199 * Trickery: parse an XML name but without consuming the input flow
3200 * Needed for rollback cases. Used only when parsing entities references.
3201 *
3202 * TODO: seems deprecated now, only used in the default part of
3203 * xmlParserHandleReference
3204 *
3205 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3206 * CombiningChar | Extender
3207 *
3208 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3209 *
3210 * [6] Names ::= Name (S Name)*
3211 *
3212 * Returns the Name parsed or NULL
3213 */
3214
3215xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003216xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003217 static int deprecated = 0;
3218 if (!deprecated) {
3219 xmlGenericError(xmlGenericErrorContext,
3220 "xmlScanName() deprecated function reached\n");
3221 deprecated = 1;
3222 }
3223 return(NULL);
3224
3225#if 0
3226 xmlChar buf[XML_MAX_NAMELEN];
3227 int len = 0;
3228
3229 GROW;
3230 if (!IS_LETTER(RAW) && (RAW != '_') &&
3231 (RAW != ':')) {
3232 return(NULL);
3233 }
3234
3235
3236 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3237 (NXT(len) == '.') || (NXT(len) == '-') ||
3238 (NXT(len) == '_') || (NXT(len) == ':') ||
3239 (IS_COMBINING(NXT(len))) ||
3240 (IS_EXTENDER(NXT(len)))) {
3241 GROW;
3242 buf[len] = NXT(len);
3243 len++;
3244 if (len >= XML_MAX_NAMELEN) {
3245 xmlGenericError(xmlGenericErrorContext,
3246 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3247 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3248 (IS_DIGIT(NXT(len))) ||
3249 (NXT(len) == '.') || (NXT(len) == '-') ||
3250 (NXT(len) == '_') || (NXT(len) == ':') ||
3251 (IS_COMBINING(NXT(len))) ||
3252 (IS_EXTENDER(NXT(len))))
3253 len++;
3254 break;
3255 }
3256 }
3257 return(xmlStrndup(buf, len));
3258#endif
3259}
3260
3261/**
3262 * xmlParserHandleReference:
3263 * @ctxt: the parser context
3264 *
3265 * TODO: Remove, now deprecated ... the test is done directly in the
3266 * content parsing
3267 * routines.
3268 *
3269 * [67] Reference ::= EntityRef | CharRef
3270 *
3271 * [68] EntityRef ::= '&' Name ';'
3272 *
3273 * [ WFC: Entity Declared ]
3274 * the Name given in the entity reference must match that in an entity
3275 * declaration, except that well-formed documents need not declare any
3276 * of the following entities: amp, lt, gt, apos, quot.
3277 *
3278 * [ WFC: Parsed Entity ]
3279 * An entity reference must not contain the name of an unparsed entity
3280 *
3281 * [66] CharRef ::= '&#' [0-9]+ ';' |
3282 * '&#x' [0-9a-fA-F]+ ';'
3283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003284 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003285 * the handling is done accordingly to
3286 * http://www.w3.org/TR/REC-xml#entproc
3287 */
3288void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003289xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003290 static int deprecated = 0;
3291 if (!deprecated) {
3292 xmlGenericError(xmlGenericErrorContext,
3293 "xmlParserHandleReference() deprecated function reached\n");
3294 deprecated = 1;
3295 }
3296
3297#if 0
3298 xmlParserInputPtr input;
3299 xmlChar *name;
3300 xmlEntityPtr ent = NULL;
3301
3302 if (ctxt->token != 0) {
3303 return;
3304 }
3305 if (RAW != '&') return;
3306 GROW;
3307 if ((RAW == '&') && (NXT(1) == '#')) {
3308 switch(ctxt->instate) {
3309 case XML_PARSER_ENTITY_DECL:
3310 case XML_PARSER_PI:
3311 case XML_PARSER_CDATA_SECTION:
3312 case XML_PARSER_COMMENT:
3313 case XML_PARSER_SYSTEM_LITERAL:
3314 /* we just ignore it there */
3315 return;
3316 case XML_PARSER_START_TAG:
3317 return;
3318 case XML_PARSER_END_TAG:
3319 return;
3320 case XML_PARSER_EOF:
3321 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3323 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3324 ctxt->wellFormed = 0;
3325 ctxt->disableSAX = 1;
3326 return;
3327 case XML_PARSER_PROLOG:
3328 case XML_PARSER_START:
3329 case XML_PARSER_MISC:
3330 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 return;
3336 case XML_PARSER_EPILOG:
3337 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 return;
3343 case XML_PARSER_DTD:
3344 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3346 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003347 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003348 ctxt->wellFormed = 0;
3349 ctxt->disableSAX = 1;
3350 return;
3351 case XML_PARSER_ENTITY_VALUE:
3352 /*
3353 * NOTE: in the case of entity values, we don't do the
3354 * substitution here since we need the literal
3355 * entity value to be able to save the internal
3356 * subset of the document.
3357 * This will be handled by xmlStringDecodeEntities
3358 */
3359 return;
3360 case XML_PARSER_CONTENT:
3361 return;
3362 case XML_PARSER_ATTRIBUTE_VALUE:
3363 /* ctxt->token = xmlParseCharRef(ctxt); */
3364 return;
3365 case XML_PARSER_IGNORE:
3366 return;
3367 }
3368 return;
3369 }
3370
3371 switch(ctxt->instate) {
3372 case XML_PARSER_CDATA_SECTION:
3373 return;
3374 case XML_PARSER_PI:
3375 case XML_PARSER_COMMENT:
3376 case XML_PARSER_SYSTEM_LITERAL:
3377 case XML_PARSER_CONTENT:
3378 return;
3379 case XML_PARSER_START_TAG:
3380 return;
3381 case XML_PARSER_END_TAG:
3382 return;
3383 case XML_PARSER_EOF:
3384 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3387 ctxt->wellFormed = 0;
3388 ctxt->disableSAX = 1;
3389 return;
3390 case XML_PARSER_PROLOG:
3391 case XML_PARSER_START:
3392 case XML_PARSER_MISC:
3393 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3395 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3396 ctxt->wellFormed = 0;
3397 ctxt->disableSAX = 1;
3398 return;
3399 case XML_PARSER_EPILOG:
3400 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3402 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3403 ctxt->wellFormed = 0;
3404 ctxt->disableSAX = 1;
3405 return;
3406 case XML_PARSER_ENTITY_VALUE:
3407 /*
3408 * NOTE: in the case of entity values, we don't do the
3409 * substitution here since we need the literal
3410 * entity value to be able to save the internal
3411 * subset of the document.
3412 * This will be handled by xmlStringDecodeEntities
3413 */
3414 return;
3415 case XML_PARSER_ATTRIBUTE_VALUE:
3416 /*
3417 * NOTE: in the case of attributes values, we don't do the
3418 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003419 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003420 * entities. The SAX callback is called with values
3421 * without entity substitution.
3422 * This will then be handled by xmlStringDecodeEntities
3423 */
3424 return;
3425 case XML_PARSER_ENTITY_DECL:
3426 /*
3427 * we just ignore it there
3428 * the substitution will be done once the entity is referenced
3429 */
3430 return;
3431 case XML_PARSER_DTD:
3432 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3434 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003435 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003436 ctxt->wellFormed = 0;
3437 ctxt->disableSAX = 1;
3438 return;
3439 case XML_PARSER_IGNORE:
3440 return;
3441 }
3442
3443/* TODO: this seems not reached anymore .... Verify ... */
3444xmlGenericError(xmlGenericErrorContext,
3445 "Reached deprecated section in xmlParserHandleReference()\n");
3446xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003447 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003448xmlGenericError(xmlGenericErrorContext,
3449 "indicating the version: %s, thanks !\n", xmlParserVersion);
3450 NEXT;
3451 name = xmlScanName(ctxt);
3452 if (name == NULL) {
3453 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3455 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3456 ctxt->wellFormed = 0;
3457 ctxt->disableSAX = 1;
3458 ctxt->token = '&';
3459 return;
3460 }
3461 if (NXT(xmlStrlen(name)) != ';') {
3462 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3464 ctxt->sax->error(ctxt->userData,
3465 "Entity reference: ';' expected\n");
3466 ctxt->wellFormed = 0;
3467 ctxt->disableSAX = 1;
3468 ctxt->token = '&';
3469 xmlFree(name);
3470 return;
3471 }
3472 SKIP(xmlStrlen(name) + 1);
3473 if (ctxt->sax != NULL) {
3474 if (ctxt->sax->getEntity != NULL)
3475 ent = ctxt->sax->getEntity(ctxt->userData, name);
3476 }
3477
3478 /*
3479 * [ WFC: Entity Declared ]
3480 * the Name given in the entity reference must match that in an entity
3481 * declaration, except that well-formed documents need not declare any
3482 * of the following entities: amp, lt, gt, apos, quot.
3483 */
3484 if (ent == NULL)
3485 ent = xmlGetPredefinedEntity(name);
3486 if (ent == NULL) {
3487 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3489 ctxt->sax->error(ctxt->userData,
3490 "Entity reference: entity %s not declared\n",
3491 name);
3492 ctxt->wellFormed = 0;
3493 ctxt->disableSAX = 1;
3494 xmlFree(name);
3495 return;
3496 }
3497
3498 /*
3499 * [ WFC: Parsed Entity ]
3500 * An entity reference must not contain the name of an unparsed entity
3501 */
3502 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3503 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData,
3506 "Entity reference to unparsed entity %s\n", name);
3507 ctxt->wellFormed = 0;
3508 ctxt->disableSAX = 1;
3509 }
3510
3511 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3512 ctxt->token = ent->content[0];
3513 xmlFree(name);
3514 return;
3515 }
3516 input = xmlNewEntityInputStream(ctxt, ent);
3517 xmlPushInput(ctxt, input);
3518 xmlFree(name);
3519#endif
3520 return;
3521}
3522
3523/**
3524 * xmlHandleEntity:
3525 * @ctxt: an XML parser context
3526 * @entity: an XML entity pointer.
3527 *
3528 * Default handling of defined entities, when should we define a new input
3529 * stream ? When do we just handle that as a set of chars ?
3530 *
3531 * OBSOLETE: to be removed at some point.
3532 */
3533
3534void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003535xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003536 static int deprecated = 0;
3537 if (!deprecated) {
3538 xmlGenericError(xmlGenericErrorContext,
3539 "xmlHandleEntity() deprecated function reached\n");
3540 deprecated = 1;
3541 }
3542
3543#if 0
3544 int len;
3545 xmlParserInputPtr input;
3546
3547 if (entity->content == NULL) {
3548 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3550 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3551 entity->name);
3552 ctxt->wellFormed = 0;
3553 ctxt->disableSAX = 1;
3554 return;
3555 }
3556 len = xmlStrlen(entity->content);
3557 if (len <= 2) goto handle_as_char;
3558
3559 /*
3560 * Redefine its content as an input stream.
3561 */
3562 input = xmlNewEntityInputStream(ctxt, entity);
3563 xmlPushInput(ctxt, input);
3564 return;
3565
3566handle_as_char:
3567 /*
3568 * Just handle the content as a set of chars.
3569 */
3570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3571 (ctxt->sax->characters != NULL))
3572 ctxt->sax->characters(ctxt->userData, entity->content, len);
3573#endif
3574}
3575
3576/**
3577 * xmlNewGlobalNs:
3578 * @doc: the document carrying the namespace
3579 * @href: the URI associated
3580 * @prefix: the prefix for the namespace
3581 *
3582 * Creation of a Namespace, the old way using PI and without scoping
3583 * DEPRECATED !!!
3584 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003585 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003586 */
3587xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003588xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3589 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003590 static int deprecated = 0;
3591 if (!deprecated) {
3592 xmlGenericError(xmlGenericErrorContext,
3593 "xmlNewGlobalNs() deprecated function reached\n");
3594 deprecated = 1;
3595 }
3596 return(NULL);
3597#if 0
3598 xmlNodePtr root;
3599
3600 xmlNsPtr cur;
3601
3602 root = xmlDocGetRootElement(doc);
3603 if (root != NULL)
3604 return(xmlNewNs(root, href, prefix));
3605
3606 /*
3607 * if there is no root element yet, create an old Namespace type
3608 * and it will be moved to the root at save time.
3609 */
3610 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3611 if (cur == NULL) {
3612 xmlGenericError(xmlGenericErrorContext,
3613 "xmlNewGlobalNs : malloc failed\n");
3614 return(NULL);
3615 }
3616 memset(cur, 0, sizeof(xmlNs));
3617 cur->type = XML_GLOBAL_NAMESPACE;
3618
3619 if (href != NULL)
3620 cur->href = xmlStrdup(href);
3621 if (prefix != NULL)
3622 cur->prefix = xmlStrdup(prefix);
3623
3624 /*
3625 * Add it at the end to preserve parsing order ...
3626 */
3627 if (doc != NULL) {
3628 if (doc->oldNs == NULL) {
3629 doc->oldNs = cur;
3630 } else {
3631 xmlNsPtr prev = doc->oldNs;
3632
3633 while (prev->next != NULL) prev = prev->next;
3634 prev->next = cur;
3635 }
3636 }
3637
3638 return(NULL);
3639#endif
3640}
3641
3642/**
3643 * xmlUpgradeOldNs:
3644 * @doc: a document pointer
3645 *
3646 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3647 * DEPRECATED
3648 */
3649void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003650xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003651 static int deprecated = 0;
3652 if (!deprecated) {
3653 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003654 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003655 deprecated = 1;
3656 }
3657#if 0
3658 xmlNsPtr cur;
3659
3660 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3661 if (doc->children == NULL) {
3662#ifdef DEBUG_TREE
3663 xmlGenericError(xmlGenericErrorContext,
3664 "xmlUpgradeOldNs: failed no root !\n");
3665#endif
3666 return;
3667 }
3668
3669 cur = doc->oldNs;
3670 while (cur->next != NULL) {
3671 cur->type = XML_LOCAL_NAMESPACE;
3672 cur = cur->next;
3673 }
3674 cur->type = XML_LOCAL_NAMESPACE;
3675 cur->next = doc->children->nsDef;
3676 doc->children->nsDef = doc->oldNs;
3677 doc->oldNs = NULL;
3678#endif
3679}
3680