blob: 4af5d717aff51ff8ce9b0deeb5e377e8d6c68dbe [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Daniel Veillard3c5ed912002-01-08 10:36:16 +000012#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000084 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000087 }
88 if ((myversion / 100) < (version / 100)) {
89 xmlGenericError(xmlGenericErrorContext,
90 "Warning: program compiled against libxml %d using older %d\n",
91 (version / 100), (myversion / 100));
92 }
93}
94
95
Daniel Veillard22090732001-07-16 00:06:07 +000096static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000097 "validate",
98 "load subset",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000141/**
Owen Taylor3473f882001-02-23 17:55:21 +0000142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000168/**
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->loadsubset;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000269/**
Owen Taylor3473f882001-02-23 17:55:21 +0000270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 int newvalidate = *((int *) value);
286 if ((!ctxt->validate) && (newvalidate != 0)) {
287 if (ctxt->vctxt.warning == NULL)
288 ctxt->vctxt.warning = xmlParserValidityWarning;
289 if (ctxt->vctxt.error == NULL)
290 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000291 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000292 }
293 ctxt->validate = newvalidate;
294 } else if (!strcmp(name, "keep blanks")) {
295 ctxt->keepBlanks = *((int *) value);
296 } else if (!strcmp(name, "disable SAX")) {
297 ctxt->disableSAX = *((int *) value);
298 } else if (!strcmp(name, "fetch external entities")) {
299 ctxt->loadsubset = *((int *) value);
300 } else if (!strcmp(name, "substitute entities")) {
301 ctxt->replaceEntities = *((int *) value);
302 } else if (!strcmp(name, "gather line info")) {
303 ctxt->record_info = *((int *) value);
304 } else if (!strcmp(name, "user data")) {
305 ctxt->userData = *((void **)value);
306 } else if (!strcmp(name, "is html")) {
307 ctxt->html = *((int *) value);
308 } else if (!strcmp(name, "is standalone")) {
309 ctxt->standalone = *((int *) value);
310 } else if (!strcmp(name, "document")) {
311 ctxt->myDoc = *((xmlDocPtr *) value);
312 } else if (!strcmp(name, "is well formed")) {
313 ctxt->wellFormed = *((int *) value);
314 } else if (!strcmp(name, "is valid")) {
315 ctxt->valid = *((int *) value);
316 } else if (!strcmp(name, "SAX block")) {
317 ctxt->sax = *((xmlSAXHandlerPtr *) value);
318 } else if (!strcmp(name, "SAX function internalSubset")) {
319 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function isStandalone")) {
321 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
323 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
325 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function resolveEntity")) {
327 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function getEntity")) {
329 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
330 } else if (!strcmp(name, "SAX function entityDecl")) {
331 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function notationDecl")) {
333 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function attributeDecl")) {
335 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function elementDecl")) {
337 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
339 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
341 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startDocument")) {
343 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endDocument")) {
345 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function startElement")) {
347 ctxt->sax->startElement = *((startElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function endElement")) {
349 ctxt->sax->endElement = *((endElementSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function reference")) {
351 ctxt->sax->reference = *((referenceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function characters")) {
353 ctxt->sax->characters = *((charactersSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
355 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function processingInstruction")) {
357 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function comment")) {
359 ctxt->sax->comment = *((commentSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function warning")) {
361 ctxt->sax->warning = *((warningSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function error")) {
363 ctxt->sax->error = *((errorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function fatalError")) {
365 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function getParameterEntity")) {
367 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
368 } else if (!strcmp(name, "SAX function cdataBlock")) {
369 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function externalSubset")) {
371 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
372 } else {
373 return(-1);
374 }
375 return(0);
376}
377
378/************************************************************************
379 * *
380 * Some functions to avoid too large macros *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlIsChar:
386 * @c: an unicode character (int)
387 *
388 * Check whether the character is allowed by the production
389 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
390 * | [#x10000-#x10FFFF]
391 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
392 * Also available as a macro IS_CHAR()
393 *
394 * Returns 0 if not, non-zero otherwise
395 */
396int
397xmlIsChar(int c) {
398 return(
399 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
400 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
401 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
402 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
403}
404
405/**
406 * xmlIsBlank:
407 * @c: an unicode character (int)
408 *
409 * Check whether the character is allowed by the production
410 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
411 * Also available as a macro IS_BLANK()
412 *
413 * Returns 0 if not, non-zero otherwise
414 */
415int
416xmlIsBlank(int c) {
417 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
418}
419
420/**
421 * xmlIsBaseChar:
422 * @c: an unicode character (int)
423 *
424 * Check whether the character is allowed by the production
425 * [85] BaseChar ::= ... long list see REC ...
426 *
427 * VI is your friend !
428 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
429 * and
430 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
431 *
432 * Returns 0 if not, non-zero otherwise
433 */
434static int xmlBaseArray[] = {
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
441 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
450 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
451};
452
453int
454xmlIsBaseChar(int c) {
455 return(
456 (((c) < 0x0100) ? xmlBaseArray[c] :
457 ( /* accelerator */
458 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
459 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
460 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
461 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
462 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
463 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
464 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
465 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
466 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
467 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
468 ((c) == 0x0386) ||
469 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
470 ((c) == 0x038C) ||
471 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
472 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
473 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
474 ((c) == 0x03DA) ||
475 ((c) == 0x03DC) ||
476 ((c) == 0x03DE) ||
477 ((c) == 0x03E0) ||
478 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
479 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
480 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
481 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
482 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
483 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
484 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
485 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
486 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
487 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
488 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
489 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
490 ((c) == 0x0559) ||
491 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
492 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
493 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
494 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
495 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
496 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
497 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
498 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
499 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
500 ((c) == 0x06D5) ||
501 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
502 (((c) >= 0x905) && ( /* accelerator */
503 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
504 ((c) == 0x093D) ||
505 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
506 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
507 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
508 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
509 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
510 ((c) == 0x09B2) ||
511 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
512 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
513 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
514 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
515 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
516 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
517 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
518 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
519 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
520 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
521 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
522 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
523 ((c) == 0x0A5E) ||
524 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
525 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
526 ((c) == 0x0A8D) ||
527 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
528 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
529 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
530 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
531 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
532 ((c) == 0x0ABD) ||
533 ((c) == 0x0AE0) ||
534 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
535 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
536 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
537 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
538 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
539 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
540 ((c) == 0x0B3D) ||
541 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
542 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
543 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
544 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
545 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
546 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
547 ((c) == 0x0B9C) ||
548 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
549 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
550 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
551 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
552 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
553 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
554 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
555 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
556 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
557 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
558 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
559 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
560 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
561 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
562 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
563 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
564 ((c) == 0x0CDE) ||
565 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
566 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
567 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
568 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
569 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
570 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
571 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
572 ((c) == 0x0E30) ||
573 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
574 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
575 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
576 ((c) == 0x0E84) ||
577 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
578 ((c) == 0x0E8A) ||
579 ((c) == 0x0E8D) ||
580 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
581 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
582 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
583 ((c) == 0x0EA5) ||
584 ((c) == 0x0EA7) ||
585 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
586 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
587 ((c) == 0x0EB0) ||
588 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
589 ((c) == 0x0EBD) ||
590 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
591 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
592 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
593 (((c) >= 0x10A0) && ( /* accelerator */
594 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
595 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
596 ((c) == 0x1100) ||
597 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
598 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
599 ((c) == 0x1109) ||
600 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
601 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
602 ((c) == 0x113C) ||
603 ((c) == 0x113E) ||
604 ((c) == 0x1140) ||
605 ((c) == 0x114C) ||
606 ((c) == 0x114E) ||
607 ((c) == 0x1150) ||
608 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
609 ((c) == 0x1159) ||
610 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
611 ((c) == 0x1163) ||
612 ((c) == 0x1165) ||
613 ((c) == 0x1167) ||
614 ((c) == 0x1169) ||
615 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
616 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
617 ((c) == 0x1175) ||
618 ((c) == 0x119E) ||
619 ((c) == 0x11A8) ||
620 ((c) == 0x11AB) ||
621 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
622 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
623 ((c) == 0x11BA) ||
624 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
625 ((c) == 0x11EB) ||
626 ((c) == 0x11F0) ||
627 ((c) == 0x11F9) ||
628 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
629 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
630 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
631 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
632 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
633 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
634 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
635 ((c) == 0x1F59) ||
636 ((c) == 0x1F5B) ||
637 ((c) == 0x1F5D) ||
638 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
639 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
640 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
641 ((c) == 0x1FBE) ||
642 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
643 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
644 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
645 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
646 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
647 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
648 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
649 ((c) == 0x2126) ||
650 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
651 ((c) == 0x212E) ||
652 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
653 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
654 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
655 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
656 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
657}
658
659/**
660 * xmlIsDigit:
661 * @c: an unicode character (int)
662 *
663 * Check whether the character is allowed by the production
664 * [88] Digit ::= ... long list see REC ...
665 *
666 * Returns 0 if not, non-zero otherwise
667 */
668int
669xmlIsDigit(int c) {
670 return(
671 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
672 (((c) >= 0x660) && ( /* accelerator */
673 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
674 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
675 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
676 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
677 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
678 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
679 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
680 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
681 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
682 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
683 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
684 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
685 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
686 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
687}
688
689/**
690 * xmlIsCombining:
691 * @c: an unicode character (int)
692 *
693 * Check whether the character is allowed by the production
694 * [87] CombiningChar ::= ... long list see REC ...
695 *
696 * Returns 0 if not, non-zero otherwise
697 */
698int
699xmlIsCombining(int c) {
700 return(
701 (((c) >= 0x300) && ( /* accelerator */
702 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
703 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
704 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
705 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
706 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
707 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
708 ((c) == 0x05BF) ||
709 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
710 ((c) == 0x05C4) ||
711 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
712 ((c) == 0x0670) ||
713 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
714 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
715 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
716 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
717 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
718 (((c) >= 0x0901) && ( /* accelerator */
719 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
720 ((c) == 0x093C) ||
721 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
722 ((c) == 0x094D) ||
723 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
724 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
725 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
726 ((c) == 0x09BC) ||
727 ((c) == 0x09BE) ||
728 ((c) == 0x09BF) ||
729 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
730 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
731 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
732 ((c) == 0x09D7) ||
733 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
734 (((c) >= 0x0A02) && ( /* accelerator */
735 ((c) == 0x0A02) ||
736 ((c) == 0x0A3C) ||
737 ((c) == 0x0A3E) ||
738 ((c) == 0x0A3F) ||
739 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
740 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
741 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
742 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
743 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
744 ((c) == 0x0ABC) ||
745 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
746 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
747 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
748 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
749 ((c) == 0x0B3C) ||
750 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
751 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
752 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
753 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
754 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
755 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
756 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
757 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
758 ((c) == 0x0BD7) ||
759 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
760 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
761 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
762 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
763 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
764 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
765 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
766 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
767 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
768 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
769 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
770 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
771 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
772 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
773 ((c) == 0x0D57) ||
774 (((c) >= 0x0E31) && ( /* accelerator */
775 ((c) == 0x0E31) ||
776 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
777 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
778 ((c) == 0x0EB1) ||
779 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
780 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
781 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
782 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 ((c) == 0x0F35) ||
784 ((c) == 0x0F37) ||
785 ((c) == 0x0F39) ||
786 ((c) == 0x0F3E) ||
787 ((c) == 0x0F3F) ||
788 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
789 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
790 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
791 ((c) == 0x0F97) ||
792 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
793 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
794 ((c) == 0x0FB9) ||
795 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
796 ((c) == 0x20E1) ||
797 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
798 ((c) == 0x3099) ||
799 ((c) == 0x309A))))))))));
800}
801
802/**
803 * xmlIsExtender:
804 * @c: an unicode character (int)
805 *
806 * Check whether the character is allowed by the production
807 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
808 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
809 * [#x309D-#x309E] | [#x30FC-#x30FE]
810 *
811 * Returns 0 if not, non-zero otherwise
812 */
813int
814xmlIsExtender(int c) {
815 switch (c) {
816 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
817 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
818 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
819 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
820 case 0x30FE:
821 return 1;
822 default:
823 return 0;
824 }
825}
826
827/**
828 * xmlIsIdeographic:
829 * @c: an unicode character (int)
830 *
831 * Check whether the character is allowed by the production
832 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
833 *
834 * Returns 0 if not, non-zero otherwise
835 */
836int
837xmlIsIdeographic(int c) {
838 return(((c) < 0x0100) ? 0 :
839 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
840 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
841 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 ((c) == 0x3007));
843}
844
845/**
846 * xmlIsLetter:
847 * @c: an unicode character (int)
848 *
849 * Check whether the character is allowed by the production
850 * [84] Letter ::= BaseChar | Ideographic
851 *
852 * Returns 0 if not, non-zero otherwise
853 */
854int
855xmlIsLetter(int c) {
856 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
857}
858
859/**
860 * xmlIsPubidChar:
861 * @c: an unicode character (int)
862 *
863 * Check whether the character is allowed by the production
864 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
865 *
866 * Returns 0 if not, non-zero otherwise
867 */
868int
869xmlIsPubidChar(int c) {
870 return(
871 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
872 (((c) >= 'a') && ((c) <= 'z')) ||
873 (((c) >= 'A') && ((c) <= 'Z')) ||
874 (((c) >= '0') && ((c) <= '9')) ||
875 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
876 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
877 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
878 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
879 ((c) == '$') || ((c) == '_') || ((c) == '%'));
880}
881
882/************************************************************************
883 * *
884 * Input handling functions for progressive parsing *
885 * *
886 ************************************************************************/
887
888/* #define DEBUG_INPUT */
889/* #define DEBUG_STACK */
890/* #define DEBUG_PUSH */
891
892
893/* we need to keep enough input to show errors in context */
894#define LINE_LEN 80
895
896#ifdef DEBUG_INPUT
897#define CHECK_BUFFER(in) check_buffer(in)
898
899void check_buffer(xmlParserInputPtr in) {
900 if (in->base != in->buf->buffer->content) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: base mismatch problem\n");
903 }
904 if (in->cur < in->base) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur < base problem\n");
907 }
908 if (in->cur > in->base + in->buf->buffer->use) {
909 xmlGenericError(xmlGenericErrorContext,
910 "xmlParserInput: cur > base + use problem\n");
911 }
912 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
913 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
914 in->buf->buffer->use, in->buf->buffer->size);
915}
916
917#else
918#define CHECK_BUFFER(in)
919#endif
920
921
922/**
923 * xmlParserInputRead:
924 * @in: an XML parser input
925 * @len: an indicative size for the lookahead
926 *
927 * This function refresh the input for the parser. It doesn't try to
928 * preserve pointers to the input buffer, and discard already read data
929 *
930 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
931 * end of this entity
932 */
933int
934xmlParserInputRead(xmlParserInputPtr in, int len) {
935 int ret;
936 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000937 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000938
939#ifdef DEBUG_INPUT
940 xmlGenericError(xmlGenericErrorContext, "Read\n");
941#endif
942 if (in->buf == NULL) return(-1);
943 if (in->base == NULL) return(-1);
944 if (in->cur == NULL) return(-1);
945 if (in->buf->buffer == NULL) return(-1);
946 if (in->buf->readcallback == NULL) return(-1);
947
948 CHECK_BUFFER(in);
949
950 used = in->cur - in->buf->buffer->content;
951 ret = xmlBufferShrink(in->buf->buffer, used);
952 if (ret > 0) {
953 in->cur -= ret;
954 in->consumed += ret;
955 }
956 ret = xmlParserInputBufferRead(in->buf, len);
957 if (in->base != in->buf->buffer->content) {
958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000959 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000962 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000963 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000965 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 CHECK_BUFFER(in);
968
969 return(ret);
970}
971
972/**
973 * xmlParserInputGrow:
974 * @in: an XML parser input
975 * @len: an indicative size for the lookahead
976 *
977 * This function increase the input for the parser. It tries to
978 * preserve pointers to the input buffer, and keep already read data
979 *
980 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
981 * end of this entity
982 */
983int
984xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988#ifdef DEBUG_INPUT
989 xmlGenericError(xmlGenericErrorContext, "Grow\n");
990#endif
991 if (in->buf == NULL) return(-1);
992 if (in->base == NULL) return(-1);
993 if (in->cur == NULL) return(-1);
994 if (in->buf->buffer == NULL) return(-1);
995
996 CHECK_BUFFER(in);
997
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 indx = in->cur - in->base;
999 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001000
1001 CHECK_BUFFER(in);
1002
1003 return(0);
1004 }
1005 if (in->buf->readcallback != NULL)
1006 ret = xmlParserInputBufferGrow(in->buf, len);
1007 else
1008 return(0);
1009
1010 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001011 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001012 * block, but we use it really as an integer to do some
1013 * pointer arithmetic. Insure will raise it as a bug but in
1014 * that specific case, that's not !
1015 */
1016 if (in->base != in->buf->buffer->content) {
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001021 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001022 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001023 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001024 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001025
1026 CHECK_BUFFER(in);
1027
1028 return(ret);
1029}
1030
1031/**
1032 * xmlParserInputShrink:
1033 * @in: an XML parser input
1034 *
1035 * This function removes used input for the parser.
1036 */
1037void
1038xmlParserInputShrink(xmlParserInputPtr in) {
1039 int used;
1040 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001041 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001042
1043#ifdef DEBUG_INPUT
1044 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1045#endif
1046 if (in->buf == NULL) return;
1047 if (in->base == NULL) return;
1048 if (in->cur == NULL) return;
1049 if (in->buf->buffer == NULL) return;
1050
1051 CHECK_BUFFER(in);
1052
1053 used = in->cur - in->buf->buffer->content;
1054 /*
1055 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001056 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001057 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001058 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001059 return;
1060 if (used > INPUT_CHUNK) {
1061 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 if (ret > 0) {
1063 in->cur -= ret;
1064 in->consumed += ret;
1065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001066 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001067 }
1068
1069 CHECK_BUFFER(in);
1070
1071 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 return;
1073 }
1074 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1075 if (in->base != in->buf->buffer->content) {
1076 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001077 * the buffer has been ereallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001083 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001084
1085 CHECK_BUFFER(in);
1086}
1087
1088/************************************************************************
1089 * *
1090 * UTF8 character input and related functions *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlNextChar:
1096 * @ctxt: the XML parser context
1097 *
1098 * Skip to the next char input char.
1099 */
1100
1101void
1102xmlNextChar(xmlParserCtxtPtr ctxt) {
1103 if (ctxt->instate == XML_PARSER_EOF)
1104 return;
1105
1106 /*
1107 * 2.11 End-of-Line Handling
1108 * the literal two-character sequence "#xD#xA" or a standalone
1109 * literal #xD, an XML processor must pass to the application
1110 * the single character #xA.
1111 */
1112 if (ctxt->token != 0) ctxt->token = 0;
1113 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1114 if ((*ctxt->input->cur == 0) &&
1115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
1207 if (*ctxt->input->cur == 0)
1208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
1210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1211 xmlParserHandlePEReference(ctxt);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
1230 }
1231 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1232
1233 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1234 ctxt->input->cur++;
1235 return;
1236}
1237
1238/**
1239 * xmlCurrentChar:
1240 * @ctxt: the XML parser context
1241 * @len: pointer to the length of the char read
1242 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001243 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001244 * bytes in the input buffer. Implement the end of line normalization:
1245 * 2.11 End-of-Line Handling
1246 * Wherever an external parsed entity or the literal entity value
1247 * of an internal parsed entity contains either the literal two-character
1248 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1249 * must pass to the application the single character #xA.
1250 * This behavior can conveniently be produced by normalizing all
1251 * line breaks to #xA on input, before parsing.)
1252 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001253 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001254 */
1255
1256int
1257xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1258 if (ctxt->instate == XML_PARSER_EOF)
1259 return(0);
1260
1261 if (ctxt->token != 0) {
1262 *len = 0;
1263 return(ctxt->token);
1264 }
1265 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1266 *len = 1;
1267 return((int) *ctxt->input->cur);
1268 }
1269 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1270 /*
1271 * We are supposed to handle UTF8, check it's valid
1272 * From rfc2044: encoding of the Unicode values on UTF-8:
1273 *
1274 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1275 * 0000 0000-0000 007F 0xxxxxxx
1276 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1277 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1278 *
1279 * Check for the 0x110000 limit too
1280 */
1281 const unsigned char *cur = ctxt->input->cur;
1282 unsigned char c;
1283 unsigned int val;
1284
1285 c = *cur;
1286 if (c & 0x80) {
1287 if (cur[1] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if ((cur[1] & 0xc0) != 0x80)
1290 goto encoding_error;
1291 if ((c & 0xe0) == 0xe0) {
1292
1293 if (cur[2] == 0)
1294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1295 if ((cur[2] & 0xc0) != 0x80)
1296 goto encoding_error;
1297 if ((c & 0xf0) == 0xf0) {
1298 if (cur[3] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if (((c & 0xf8) != 0xf0) ||
1301 ((cur[3] & 0xc0) != 0x80))
1302 goto encoding_error;
1303 /* 4-byte code */
1304 *len = 4;
1305 val = (cur[0] & 0x7) << 18;
1306 val |= (cur[1] & 0x3f) << 12;
1307 val |= (cur[2] & 0x3f) << 6;
1308 val |= cur[3] & 0x3f;
1309 } else {
1310 /* 3-byte code */
1311 *len = 3;
1312 val = (cur[0] & 0xf) << 12;
1313 val |= (cur[1] & 0x3f) << 6;
1314 val |= cur[2] & 0x3f;
1315 }
1316 } else {
1317 /* 2-byte code */
1318 *len = 2;
1319 val = (cur[0] & 0x1f) << 6;
1320 val |= cur[1] & 0x3f;
1321 }
1322 if (!IS_CHAR(val)) {
1323 if ((ctxt->sax != NULL) &&
1324 (ctxt->sax->error != NULL))
1325 ctxt->sax->error(ctxt->userData,
1326 "Char 0x%X out of allowed range\n", val);
1327 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1328 ctxt->wellFormed = 0;
1329 ctxt->disableSAX = 1;
1330 }
1331 return(val);
1332 } else {
1333 /* 1-byte code */
1334 *len = 1;
1335 if (*ctxt->input->cur == 0xD) {
1336 if (ctxt->input->cur[1] == 0xA) {
1337 ctxt->nbChars++;
1338 ctxt->input->cur++;
1339 }
1340 return(0xA);
1341 }
1342 return((int) *ctxt->input->cur);
1343 }
1344 }
1345 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001346 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001347 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001348 * XML constructs only use < 128 chars
1349 */
1350 *len = 1;
1351 if (*ctxt->input->cur == 0xD) {
1352 if (ctxt->input->cur[1] == 0xA) {
1353 ctxt->nbChars++;
1354 ctxt->input->cur++;
1355 }
1356 return(0xA);
1357 }
1358 return((int) *ctxt->input->cur);
1359encoding_error:
1360 /*
1361 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001362 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001363 * declaration header. Report the error and switch the encoding
1364 * to ISO-Latin-1 (if you don't like this policy, just declare the
1365 * encoding !)
1366 */
1367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1368 ctxt->sax->error(ctxt->userData,
1369 "Input is not proper UTF-8, indicate encoding !\n");
1370 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1371 ctxt->input->cur[0], ctxt->input->cur[1],
1372 ctxt->input->cur[2], ctxt->input->cur[3]);
1373 }
1374 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1375
1376 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1377 *len = 1;
1378 return((int) *ctxt->input->cur);
1379}
1380
1381/**
1382 * xmlStringCurrentChar:
1383 * @ctxt: the XML parser context
1384 * @cur: pointer to the beginning of the char
1385 * @len: pointer to the length of the char read
1386 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001387 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001388 * bytes in the input buffer.
1389 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001390 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001391 */
1392
1393int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001394xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1395{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001396 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001397 /*
1398 * We are supposed to handle UTF8, check it's valid
1399 * From rfc2044: encoding of the Unicode values on UTF-8:
1400 *
1401 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1402 * 0000 0000-0000 007F 0xxxxxxx
1403 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1404 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1405 *
1406 * Check for the 0x110000 limit too
1407 */
1408 unsigned char c;
1409 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001410
Daniel Veillardd8224e02002-01-13 15:43:22 +00001411 c = *cur;
1412 if (c & 0x80) {
1413 if ((cur[1] & 0xc0) != 0x80)
1414 goto encoding_error;
1415 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001416
Daniel Veillardd8224e02002-01-13 15:43:22 +00001417 if ((cur[2] & 0xc0) != 0x80)
1418 goto encoding_error;
1419 if ((c & 0xf0) == 0xf0) {
1420 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1421 goto encoding_error;
1422 /* 4-byte code */
1423 *len = 4;
1424 val = (cur[0] & 0x7) << 18;
1425 val |= (cur[1] & 0x3f) << 12;
1426 val |= (cur[2] & 0x3f) << 6;
1427 val |= cur[3] & 0x3f;
1428 } else {
1429 /* 3-byte code */
1430 *len = 3;
1431 val = (cur[0] & 0xf) << 12;
1432 val |= (cur[1] & 0x3f) << 6;
1433 val |= cur[2] & 0x3f;
1434 }
1435 } else {
1436 /* 2-byte code */
1437 *len = 2;
1438 val = (cur[0] & 0x1f) << 6;
1439 val |= cur[1] & 0x3f;
1440 }
1441 if (!IS_CHAR(val)) {
1442 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1443 (ctxt->sax->error != NULL))
1444 ctxt->sax->error(ctxt->userData,
1445 "Char 0x%X out of allowed range\n",
1446 val);
1447 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1448 ctxt->wellFormed = 0;
1449 ctxt->disableSAX = 1;
1450 }
1451 return (val);
1452 } else {
1453 /* 1-byte code */
1454 *len = 1;
1455 return ((int) *cur);
1456 }
Owen Taylor3473f882001-02-23 17:55:21 +00001457 }
1458 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001459 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001460 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001461 * XML constructs only use < 128 chars
1462 */
1463 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001464 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001465encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001466
Owen Taylor3473f882001-02-23 17:55:21 +00001467 /*
1468 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001469 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001470 * declaration header. Report the error and switch the encoding
1471 * to ISO-Latin-1 (if you don't like this policy, just declare the
1472 * encoding !)
1473 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001474 if (ctxt != NULL) {
1475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1476 ctxt->sax->error(ctxt->userData,
1477 "Input is not proper UTF-8, indicate encoding !\n");
1478 ctxt->sax->error(ctxt->userData,
1479 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1480 ctxt->input->cur[0], ctxt->input->cur[1],
1481 ctxt->input->cur[2], ctxt->input->cur[3]);
1482 }
1483 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Owen Taylor3473f882001-02-23 17:55:21 +00001484 }
Owen Taylor3473f882001-02-23 17:55:21 +00001485
1486 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001487 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001488}
1489
1490/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001491 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001493 * @val: the char value
1494 *
1495 * append the char value in the array
1496 *
1497 * Returns the number of xmlChar written
1498 */
Owen Taylor3473f882001-02-23 17:55:21 +00001499int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001500xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * We are supposed to handle UTF8, check it's valid
1503 * From rfc2044: encoding of the Unicode values on UTF-8:
1504 *
1505 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1506 * 0000 0000-0000 007F 0xxxxxxx
1507 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1508 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1509 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001510 if (val >= 0x80) {
1511 xmlChar *savedout = out;
1512 int bits;
1513 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1514 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1515 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1516 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001517 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001519 val);
1520 return(0);
1521 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001522 for ( ; bits >= 0; bits-= 6)
1523 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1524 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 }
1526 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001527 return 1;
1528}
1529
1530/**
1531 * xmlCopyChar:
1532 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001533 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001534 * @val: the char value
1535 *
1536 * append the char value in the array
1537 *
1538 * Returns the number of xmlChar written
1539 */
1540
1541int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001542xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001543 /* the len parameter is ignored */
1544 if (val >= 0x80) {
1545 return(xmlCopyCharMultiByte (out, val));
1546 }
1547 *out = (xmlChar) val;
1548 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001549}
1550
1551/************************************************************************
1552 * *
1553 * Commodity functions to switch encodings *
1554 * *
1555 ************************************************************************/
1556
1557/**
1558 * xmlSwitchEncoding:
1559 * @ctxt: the parser context
1560 * @enc: the encoding value (number)
1561 *
1562 * change the input functions when discovering the character encoding
1563 * of a given entity.
1564 *
1565 * Returns 0 in case of success, -1 otherwise
1566 */
1567int
1568xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1569{
1570 xmlCharEncodingHandlerPtr handler;
1571
1572 switch (enc) {
1573 case XML_CHAR_ENCODING_ERROR:
1574 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1576 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1577 ctxt->wellFormed = 0;
1578 ctxt->disableSAX = 1;
1579 break;
1580 case XML_CHAR_ENCODING_NONE:
1581 /* let's assume it's UTF-8 without the XML decl */
1582 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1583 return(0);
1584 case XML_CHAR_ENCODING_UTF8:
1585 /* default encoding, no conversion should be needed */
1586 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001587
1588 /*
1589 * Errata on XML-1.0 June 20 2001
1590 * Specific handling of the Byte Order Mark for
1591 * UTF-8
1592 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001593 if ((ctxt->input != NULL) &&
1594 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001595 (ctxt->input->cur[1] == 0xBB) &&
1596 (ctxt->input->cur[2] == 0xBF)) {
1597 ctxt->input->cur += 3;
1598 }
Owen Taylor3473f882001-02-23 17:55:21 +00001599 return(0);
1600 default:
1601 break;
1602 }
1603 handler = xmlGetCharEncodingHandler(enc);
1604 if (handler == NULL) {
1605 /*
1606 * Default handlers.
1607 */
1608 switch (enc) {
1609 case XML_CHAR_ENCODING_ERROR:
1610 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1613 ctxt->wellFormed = 0;
1614 ctxt->disableSAX = 1;
1615 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1616 break;
1617 case XML_CHAR_ENCODING_NONE:
1618 /* let's assume it's UTF-8 without the XML decl */
1619 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1620 return(0);
1621 case XML_CHAR_ENCODING_UTF8:
1622 case XML_CHAR_ENCODING_ASCII:
1623 /* default encoding, no conversion should be needed */
1624 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1625 return(0);
1626 case XML_CHAR_ENCODING_UTF16LE:
1627 break;
1628 case XML_CHAR_ENCODING_UTF16BE:
1629 break;
1630 case XML_CHAR_ENCODING_UCS4LE:
1631 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633 ctxt->sax->error(ctxt->userData,
1634 "char encoding USC4 little endian not supported\n");
1635 break;
1636 case XML_CHAR_ENCODING_UCS4BE:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding USC4 big endian not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_EBCDIC:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding EBCDIC not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_UCS4_2143:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding UCS4 2143 not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS4_3412:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS4 3412 not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_UCS2:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding UCS2 not supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_8859_1:
1667 case XML_CHAR_ENCODING_8859_2:
1668 case XML_CHAR_ENCODING_8859_3:
1669 case XML_CHAR_ENCODING_8859_4:
1670 case XML_CHAR_ENCODING_8859_5:
1671 case XML_CHAR_ENCODING_8859_6:
1672 case XML_CHAR_ENCODING_8859_7:
1673 case XML_CHAR_ENCODING_8859_8:
1674 case XML_CHAR_ENCODING_8859_9:
1675 /*
1676 * We used to keep the internal content in the
1677 * document encoding however this turns being unmaintainable
1678 * So xmlGetCharEncodingHandler() will return non-null
1679 * values for this now.
1680 */
1681 if ((ctxt->inputNr == 1) &&
1682 (ctxt->encoding == NULL) &&
1683 (ctxt->input->encoding != NULL)) {
1684 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1685 }
1686 ctxt->charset = enc;
1687 return(0);
1688 case XML_CHAR_ENCODING_2022_JP:
1689 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1691 ctxt->sax->error(ctxt->userData,
1692 "char encoding ISO-2022-JPnot supported\n");
1693 break;
1694 case XML_CHAR_ENCODING_SHIFT_JIS:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding Shift_JIS not supported\n");
1699 break;
1700 case XML_CHAR_ENCODING_EUC_JP:
1701 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1703 ctxt->sax->error(ctxt->userData,
1704 "char encoding EUC-JPnot supported\n");
1705 break;
1706 }
1707 }
1708 if (handler == NULL)
1709 return(-1);
1710 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1711 return(xmlSwitchToEncoding(ctxt, handler));
1712}
1713
1714/**
1715 * xmlSwitchToEncoding:
1716 * @ctxt: the parser context
1717 * @handler: the encoding handler
1718 *
1719 * change the input functions when discovering the character encoding
1720 * of a given entity.
1721 *
1722 * Returns 0 in case of success, -1 otherwise
1723 */
1724int
1725xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1726{
1727 int nbchars;
1728
1729 if (handler != NULL) {
1730 if (ctxt->input != NULL) {
1731 if (ctxt->input->buf != NULL) {
1732 if (ctxt->input->buf->encoder != NULL) {
1733 if (ctxt->input->buf->encoder == handler)
1734 return(0);
1735 /*
1736 * Note: this is a bit dangerous, but that's what it
1737 * takes to use nearly compatible signature for different
1738 * encodings.
1739 */
1740 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1741 ctxt->input->buf->encoder = handler;
1742 return(0);
1743 }
1744 ctxt->input->buf->encoder = handler;
1745
1746 /*
1747 * Is there already some content down the pipe to convert ?
1748 */
1749 if ((ctxt->input->buf->buffer != NULL) &&
1750 (ctxt->input->buf->buffer->use > 0)) {
1751 int processed;
1752
1753 /*
1754 * Specific handling of the Byte Order Mark for
1755 * UTF-16
1756 */
1757 if ((handler->name != NULL) &&
1758 (!strcmp(handler->name, "UTF-16LE")) &&
1759 (ctxt->input->cur[0] == 0xFF) &&
1760 (ctxt->input->cur[1] == 0xFE)) {
1761 ctxt->input->cur += 2;
1762 }
1763 if ((handler->name != NULL) &&
1764 (!strcmp(handler->name, "UTF-16BE")) &&
1765 (ctxt->input->cur[0] == 0xFE) &&
1766 (ctxt->input->cur[1] == 0xFF)) {
1767 ctxt->input->cur += 2;
1768 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001769 /*
1770 * Errata on XML-1.0 June 20 2001
1771 * Specific handling of the Byte Order Mark for
1772 * UTF-8
1773 */
1774 if ((handler->name != NULL) &&
1775 (!strcmp(handler->name, "UTF-8")) &&
1776 (ctxt->input->cur[0] == 0xEF) &&
1777 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001778 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001779 ctxt->input->cur += 3;
1780 }
Owen Taylor3473f882001-02-23 17:55:21 +00001781
1782 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001783 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001784 * Move it as the raw buffer and create a new input buffer
1785 */
1786 processed = ctxt->input->cur - ctxt->input->base;
1787 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1788 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1789 ctxt->input->buf->buffer = xmlBufferCreate();
1790
1791 if (ctxt->html) {
1792 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001793 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001794 */
1795 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1796 ctxt->input->buf->buffer,
1797 ctxt->input->buf->raw);
1798 } else {
1799 /*
1800 * convert just enough to get
1801 * '<?xml version="1.0" encoding="xxx"?>'
1802 * parsed with the autodetected encoding
1803 * into the parser reading buffer.
1804 */
1805 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1806 ctxt->input->buf->buffer,
1807 ctxt->input->buf->raw);
1808 }
1809 if (nbchars < 0) {
1810 xmlGenericError(xmlGenericErrorContext,
1811 "xmlSwitchToEncoding: encoder error\n");
1812 return(-1);
1813 }
1814 ctxt->input->base =
1815 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001816 ctxt->input->end =
1817 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001818
1819 }
1820 return(0);
1821 } else {
1822 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1823 /*
1824 * When parsing a static memory array one must know the
1825 * size to be able to convert the buffer.
1826 */
1827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1828 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001829 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001830 return(-1);
1831 } else {
1832 int processed;
1833
1834 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001835 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001836 * Move it as the raw buffer and create a new input buffer
1837 */
1838 processed = ctxt->input->cur - ctxt->input->base;
1839
1840 ctxt->input->buf->raw = xmlBufferCreate();
1841 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1842 ctxt->input->length - processed);
1843 ctxt->input->buf->buffer = xmlBufferCreate();
1844
1845 /*
1846 * convert as much as possible of the raw input
1847 * to the parser reading buffer.
1848 */
1849 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1850 ctxt->input->buf->buffer,
1851 ctxt->input->buf->raw);
1852 if (nbchars < 0) {
1853 xmlGenericError(xmlGenericErrorContext,
1854 "xmlSwitchToEncoding: encoder error\n");
1855 return(-1);
1856 }
1857
1858 /*
1859 * Conversion succeeded, get rid of the old buffer
1860 */
1861 if ((ctxt->input->free != NULL) &&
1862 (ctxt->input->base != NULL))
1863 ctxt->input->free((xmlChar *) ctxt->input->base);
1864 ctxt->input->base =
1865 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001866 ctxt->input->end =
1867 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001868 }
1869 }
1870 } else {
1871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1872 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001873 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(-1);
1875 }
1876 /*
1877 * The parsing is now done in UTF8 natively
1878 */
1879 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1880 } else
1881 return(-1);
1882 return(0);
1883
1884}
1885
1886/************************************************************************
1887 * *
1888 * Commodity functions to handle entities processing *
1889 * *
1890 ************************************************************************/
1891
1892/**
1893 * xmlFreeInputStream:
1894 * @input: an xmlParserInputPtr
1895 *
1896 * Free up an input stream.
1897 */
1898void
1899xmlFreeInputStream(xmlParserInputPtr input) {
1900 if (input == NULL) return;
1901
1902 if (input->filename != NULL) xmlFree((char *) input->filename);
1903 if (input->directory != NULL) xmlFree((char *) input->directory);
1904 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1905 if (input->version != NULL) xmlFree((char *) input->version);
1906 if ((input->free != NULL) && (input->base != NULL))
1907 input->free((xmlChar *) input->base);
1908 if (input->buf != NULL)
1909 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001910 xmlFree(input);
1911}
1912
1913/**
1914 * xmlNewInputStream:
1915 * @ctxt: an XML parser context
1916 *
1917 * Create a new input stream structure
1918 * Returns the new input stream or NULL
1919 */
1920xmlParserInputPtr
1921xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1922 xmlParserInputPtr input;
1923
1924 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1925 if (input == NULL) {
1926 if (ctxt != NULL) {
1927 ctxt->errNo = XML_ERR_NO_MEMORY;
1928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1929 ctxt->sax->error(ctxt->userData,
1930 "malloc: couldn't allocate a new input stream\n");
1931 ctxt->errNo = XML_ERR_NO_MEMORY;
1932 }
1933 return(NULL);
1934 }
1935 memset(input, 0, sizeof(xmlParserInput));
1936 input->line = 1;
1937 input->col = 1;
1938 input->standalone = -1;
1939 return(input);
1940}
1941
1942/**
1943 * xmlNewIOInputStream:
1944 * @ctxt: an XML parser context
1945 * @input: an I/O Input
1946 * @enc: the charset encoding if known
1947 *
1948 * Create a new input stream structure encapsulating the @input into
1949 * a stream suitable for the parser.
1950 *
1951 * Returns the new input stream or NULL
1952 */
1953xmlParserInputPtr
1954xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1955 xmlCharEncoding enc) {
1956 xmlParserInputPtr inputStream;
1957
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1960 inputStream = xmlNewInputStream(ctxt);
1961 if (inputStream == NULL) {
1962 return(NULL);
1963 }
1964 inputStream->filename = NULL;
1965 inputStream->buf = input;
1966 inputStream->base = inputStream->buf->buffer->content;
1967 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001968 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001969 if (enc != XML_CHAR_ENCODING_NONE) {
1970 xmlSwitchEncoding(ctxt, enc);
1971 }
1972
1973 return(inputStream);
1974}
1975
1976/**
1977 * xmlNewEntityInputStream:
1978 * @ctxt: an XML parser context
1979 * @entity: an Entity pointer
1980 *
1981 * Create a new input stream based on an xmlEntityPtr
1982 *
1983 * Returns the new input stream or NULL
1984 */
1985xmlParserInputPtr
1986xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1987 xmlParserInputPtr input;
1988
1989 if (entity == NULL) {
1990 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1992 ctxt->sax->error(ctxt->userData,
1993 "internal: xmlNewEntityInputStream entity = NULL\n");
1994 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1995 return(NULL);
1996 }
1997 if (xmlParserDebugEntities)
1998 xmlGenericError(xmlGenericErrorContext,
1999 "new input from entity: %s\n", entity->name);
2000 if (entity->content == NULL) {
2001 switch (entity->etype) {
2002 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2003 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "xmlNewEntityInputStream unparsed entity !\n");
2007 break;
2008 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2009 case XML_EXTERNAL_PARAMETER_ENTITY:
2010 return(xmlLoadExternalEntity((char *) entity->URI,
2011 (char *) entity->ExternalID, ctxt));
2012 case XML_INTERNAL_GENERAL_ENTITY:
2013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2014 ctxt->sax->error(ctxt->userData,
2015 "Internal entity %s without content !\n", entity->name);
2016 break;
2017 case XML_INTERNAL_PARAMETER_ENTITY:
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "Internal parameter entity %s without content !\n", entity->name);
2022 break;
2023 case XML_INTERNAL_PREDEFINED_ENTITY:
2024 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2026 ctxt->sax->error(ctxt->userData,
2027 "Predefined entity %s without content !\n", entity->name);
2028 break;
2029 }
2030 return(NULL);
2031 }
2032 input = xmlNewInputStream(ctxt);
2033 if (input == NULL) {
2034 return(NULL);
2035 }
2036 input->filename = (char *) entity->URI;
2037 input->base = entity->content;
2038 input->cur = entity->content;
2039 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002040 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002041 return(input);
2042}
2043
2044/**
2045 * xmlNewStringInputStream:
2046 * @ctxt: an XML parser context
2047 * @buffer: an memory buffer
2048 *
2049 * Create a new input stream based on a memory buffer.
2050 * Returns the new input stream
2051 */
2052xmlParserInputPtr
2053xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2054 xmlParserInputPtr input;
2055
2056 if (buffer == NULL) {
2057 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2059 ctxt->sax->error(ctxt->userData,
2060 "internal: xmlNewStringInputStream string = NULL\n");
2061 return(NULL);
2062 }
2063 if (xmlParserDebugEntities)
2064 xmlGenericError(xmlGenericErrorContext,
2065 "new fixed input: %.30s\n", buffer);
2066 input = xmlNewInputStream(ctxt);
2067 if (input == NULL) {
2068 return(NULL);
2069 }
2070 input->base = buffer;
2071 input->cur = buffer;
2072 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002073 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002074 return(input);
2075}
2076
2077/**
2078 * xmlNewInputFromFile:
2079 * @ctxt: an XML parser context
2080 * @filename: the filename to use as entity
2081 *
2082 * Create a new input stream based on a file.
2083 *
2084 * Returns the new input stream or NULL in case of error
2085 */
2086xmlParserInputPtr
2087xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2088 xmlParserInputBufferPtr buf;
2089 xmlParserInputPtr inputStream;
2090 char *directory = NULL;
2091 xmlChar *URI = NULL;
2092
2093 if (xmlParserDebugEntities)
2094 xmlGenericError(xmlGenericErrorContext,
2095 "new input from file: %s\n", filename);
2096 if (ctxt == NULL) return(NULL);
2097 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2098 if (buf == NULL)
2099 return(NULL);
2100
2101 URI = xmlStrdup((xmlChar *) filename);
2102 directory = xmlParserGetDirectory((const char *) URI);
2103
2104 inputStream = xmlNewInputStream(ctxt);
2105 if (inputStream == NULL) {
2106 if (directory != NULL) xmlFree((char *) directory);
2107 if (URI != NULL) xmlFree((char *) URI);
2108 return(NULL);
2109 }
2110
2111 inputStream->filename = (const char *) URI;
2112 inputStream->directory = directory;
2113 inputStream->buf = buf;
2114
2115 inputStream->base = inputStream->buf->buffer->content;
2116 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002117 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if ((ctxt->directory == NULL) && (directory != NULL))
2119 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2120 return(inputStream);
2121}
2122
2123/************************************************************************
2124 * *
2125 * Commodity functions to handle parser contexts *
2126 * *
2127 ************************************************************************/
2128
2129/**
2130 * xmlInitParserCtxt:
2131 * @ctxt: an XML parser context
2132 *
2133 * Initialize a parser context
2134 */
2135
2136void
2137xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2138{
2139 xmlSAXHandler *sax;
2140
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002141 if(ctxt==NULL) {
2142 xmlGenericError(xmlGenericErrorContext,
2143 "xmlInitParserCtxt: NULL context given\n");
2144 return;
2145 }
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 xmlDefaultSAXHandlerInit();
2148
2149 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2150 if (sax == NULL) {
2151 xmlGenericError(xmlGenericErrorContext,
2152 "xmlInitParserCtxt: out of memory\n");
2153 }
2154 else
2155 memset(sax, 0, sizeof(xmlSAXHandler));
2156
2157 /* Allocate the Input stack */
2158 ctxt->inputTab = (xmlParserInputPtr *)
2159 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2160 if (ctxt->inputTab == NULL) {
2161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: out of memory\n");
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 return;
2167 }
2168 ctxt->inputNr = 0;
2169 ctxt->inputMax = 5;
2170 ctxt->input = NULL;
2171
2172 ctxt->version = NULL;
2173 ctxt->encoding = NULL;
2174 ctxt->standalone = -1;
2175 ctxt->hasExternalSubset = 0;
2176 ctxt->hasPErefs = 0;
2177 ctxt->html = 0;
2178 ctxt->external = 0;
2179 ctxt->instate = XML_PARSER_START;
2180 ctxt->token = 0;
2181 ctxt->directory = NULL;
2182
2183 /* Allocate the Node stack */
2184 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2185 if (ctxt->nodeTab == NULL) {
2186 xmlGenericError(xmlGenericErrorContext,
2187 "xmlInitParserCtxt: out of memory\n");
2188 ctxt->nodeNr = 0;
2189 ctxt->nodeMax = 0;
2190 ctxt->node = NULL;
2191 ctxt->inputNr = 0;
2192 ctxt->inputMax = 0;
2193 ctxt->input = NULL;
2194 return;
2195 }
2196 ctxt->nodeNr = 0;
2197 ctxt->nodeMax = 10;
2198 ctxt->node = NULL;
2199
2200 /* Allocate the Name stack */
2201 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2202 if (ctxt->nameTab == NULL) {
2203 xmlGenericError(xmlGenericErrorContext,
2204 "xmlInitParserCtxt: out of memory\n");
2205 ctxt->nodeNr = 0;
2206 ctxt->nodeMax = 0;
2207 ctxt->node = NULL;
2208 ctxt->inputNr = 0;
2209 ctxt->inputMax = 0;
2210 ctxt->input = NULL;
2211 ctxt->nameNr = 0;
2212 ctxt->nameMax = 0;
2213 ctxt->name = NULL;
2214 return;
2215 }
2216 ctxt->nameNr = 0;
2217 ctxt->nameMax = 10;
2218 ctxt->name = NULL;
2219
2220 /* Allocate the space stack */
2221 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2222 if (ctxt->spaceTab == NULL) {
2223 xmlGenericError(xmlGenericErrorContext,
2224 "xmlInitParserCtxt: out of memory\n");
2225 ctxt->nodeNr = 0;
2226 ctxt->nodeMax = 0;
2227 ctxt->node = NULL;
2228 ctxt->inputNr = 0;
2229 ctxt->inputMax = 0;
2230 ctxt->input = NULL;
2231 ctxt->nameNr = 0;
2232 ctxt->nameMax = 0;
2233 ctxt->name = NULL;
2234 ctxt->spaceNr = 0;
2235 ctxt->spaceMax = 0;
2236 ctxt->space = NULL;
2237 return;
2238 }
2239 ctxt->spaceNr = 1;
2240 ctxt->spaceMax = 10;
2241 ctxt->spaceTab[0] = -1;
2242 ctxt->space = &ctxt->spaceTab[0];
2243
Daniel Veillard14be0a12001-03-03 18:50:55 +00002244 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002245 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002246
Owen Taylor3473f882001-02-23 17:55:21 +00002247 ctxt->userData = ctxt;
2248 ctxt->myDoc = NULL;
2249 ctxt->wellFormed = 1;
2250 ctxt->valid = 1;
2251 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2252 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2253 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002254 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002255 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002256 if (ctxt->keepBlanks == 0)
2257 sax->ignorableWhitespace = ignorableWhitespace;
2258
Owen Taylor3473f882001-02-23 17:55:21 +00002259 ctxt->vctxt.userData = ctxt;
2260 if (ctxt->validate) {
2261 ctxt->vctxt.error = xmlParserValidityError;
2262 if (xmlGetWarningsDefaultValue == 0)
2263 ctxt->vctxt.warning = NULL;
2264 else
2265 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002266 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 } else {
2268 ctxt->vctxt.error = NULL;
2269 ctxt->vctxt.warning = NULL;
2270 }
2271 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2272 ctxt->record_info = 0;
2273 ctxt->nbChars = 0;
2274 ctxt->checkIndex = 0;
2275 ctxt->inSubset = 0;
2276 ctxt->errNo = XML_ERR_OK;
2277 ctxt->depth = 0;
2278 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002279 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002280 xmlInitNodeInfoSeq(&ctxt->node_seq);
2281}
2282
2283/**
2284 * xmlFreeParserCtxt:
2285 * @ctxt: an XML parser context
2286 *
2287 * Free all the memory used by a parser context. However the parsed
2288 * document in ctxt->myDoc is not freed.
2289 */
2290
2291void
2292xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2293{
2294 xmlParserInputPtr input;
2295 xmlChar *oldname;
2296
2297 if (ctxt == NULL) return;
2298
2299 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2300 xmlFreeInputStream(input);
2301 }
2302 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2303 xmlFree(oldname);
2304 }
2305 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2306 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2307 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2308 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2309 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2310 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2311 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2312 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2313 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002314 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2315 xmlFree(ctxt->sax);
2316 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002317 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002318#ifdef LIBXML_CATALOG_ENABLED
2319 if (ctxt->catalogs != NULL)
2320 xmlCatalogFreeLocal(ctxt->catalogs);
2321#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002322 xmlFree(ctxt);
2323}
2324
2325/**
2326 * xmlNewParserCtxt:
2327 *
2328 * Allocate and initialize a new parser context.
2329 *
2330 * Returns the xmlParserCtxtPtr or NULL
2331 */
2332
2333xmlParserCtxtPtr
2334xmlNewParserCtxt()
2335{
2336 xmlParserCtxtPtr ctxt;
2337
2338 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2339 if (ctxt == NULL) {
2340 xmlGenericError(xmlGenericErrorContext,
2341 "xmlNewParserCtxt : cannot allocate context\n");
2342 perror("malloc");
2343 return(NULL);
2344 }
2345 memset(ctxt, 0, sizeof(xmlParserCtxt));
2346 xmlInitParserCtxt(ctxt);
2347 return(ctxt);
2348}
2349
2350/************************************************************************
2351 * *
2352 * Handling of node informations *
2353 * *
2354 ************************************************************************/
2355
2356/**
2357 * xmlClearParserCtxt:
2358 * @ctxt: an XML parser context
2359 *
2360 * Clear (release owned resources) and reinitialize a parser context
2361 */
2362
2363void
2364xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2365{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002366 if (ctxt==NULL)
2367 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 xmlClearNodeInfoSeq(&ctxt->node_seq);
2369 xmlInitParserCtxt(ctxt);
2370}
2371
2372/**
2373 * xmlParserFindNodeInfo:
2374 * @ctxt: an XML parser context
2375 * @node: an XML node within the tree
2376 *
2377 * Find the parser node info struct for a given node
2378 *
2379 * Returns an xmlParserNodeInfo block pointer or NULL
2380 */
2381const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2382 const xmlNode* node)
2383{
2384 unsigned long pos;
2385
2386 /* Find position where node should be at */
2387 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002388 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002389 return &ctx->node_seq.buffer[pos];
2390 else
2391 return NULL;
2392}
2393
2394
2395/**
2396 * xmlInitNodeInfoSeq:
2397 * @seq: a node info sequence pointer
2398 *
2399 * -- Initialize (set to initial state) node info sequence
2400 */
2401void
2402xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2403{
2404 seq->length = 0;
2405 seq->maximum = 0;
2406 seq->buffer = NULL;
2407}
2408
2409/**
2410 * xmlClearNodeInfoSeq:
2411 * @seq: a node info sequence pointer
2412 *
2413 * -- Clear (release memory and reinitialize) node
2414 * info sequence
2415 */
2416void
2417xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2418{
2419 if ( seq->buffer != NULL )
2420 xmlFree(seq->buffer);
2421 xmlInitNodeInfoSeq(seq);
2422}
2423
2424
2425/**
2426 * xmlParserFindNodeInfoIndex:
2427 * @seq: a node info sequence pointer
2428 * @node: an XML node pointer
2429 *
2430 *
2431 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2432 * the given node is or should be at in a sorted sequence
2433 *
2434 * Returns a long indicating the position of the record
2435 */
2436unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2437 const xmlNode* node)
2438{
2439 unsigned long upper, lower, middle;
2440 int found = 0;
2441
2442 /* Do a binary search for the key */
2443 lower = 1;
2444 upper = seq->length;
2445 middle = 0;
2446 while ( lower <= upper && !found) {
2447 middle = lower + (upper - lower) / 2;
2448 if ( node == seq->buffer[middle - 1].node )
2449 found = 1;
2450 else if ( node < seq->buffer[middle - 1].node )
2451 upper = middle - 1;
2452 else
2453 lower = middle + 1;
2454 }
2455
2456 /* Return position */
2457 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2458 return middle;
2459 else
2460 return middle - 1;
2461}
2462
2463
2464/**
2465 * xmlParserAddNodeInfo:
2466 * @ctxt: an XML parser context
2467 * @info: a node info sequence pointer
2468 *
2469 * Insert node info record into the sorted sequence
2470 */
2471void
2472xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2473 const xmlParserNodeInfo* info)
2474{
2475 unsigned long pos;
2476 static unsigned int block_size = 5;
2477
2478 /* Find pos and check to see if node is already in the sequence */
2479 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2480 if ( pos < ctxt->node_seq.length
2481 && ctxt->node_seq.buffer[pos].node == info->node ) {
2482 ctxt->node_seq.buffer[pos] = *info;
2483 }
2484
2485 /* Otherwise, we need to add new node to buffer */
2486 else {
2487 /* Expand buffer by 5 if needed */
2488 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2489 xmlParserNodeInfo* tmp_buffer;
2490 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2491 *(ctxt->node_seq.maximum + block_size));
2492
2493 if ( ctxt->node_seq.buffer == NULL )
2494 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2495 else
2496 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2497
2498 if ( tmp_buffer == NULL ) {
2499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2500 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2501 ctxt->errNo = XML_ERR_NO_MEMORY;
2502 return;
2503 }
2504 ctxt->node_seq.buffer = tmp_buffer;
2505 ctxt->node_seq.maximum += block_size;
2506 }
2507
2508 /* If position is not at end, move elements out of the way */
2509 if ( pos != ctxt->node_seq.length ) {
2510 unsigned long i;
2511
2512 for ( i = ctxt->node_seq.length; i > pos; i-- )
2513 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2514 }
2515
2516 /* Copy element and increase length */
2517 ctxt->node_seq.buffer[pos] = *info;
2518 ctxt->node_seq.length++;
2519 }
2520}
2521
2522/************************************************************************
2523 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002524 * Defaults settings *
2525 * *
2526 ************************************************************************/
2527/**
2528 * xmlPedanticParserDefault:
2529 * @val: int 0 or 1
2530 *
2531 * Set and return the previous value for enabling pedantic warnings.
2532 *
2533 * Returns the last value for 0 for no substitution, 1 for substitution.
2534 */
2535
2536int
2537xmlPedanticParserDefault(int val) {
2538 int old = xmlPedanticParserDefaultValue;
2539
2540 xmlPedanticParserDefaultValue = val;
2541 return(old);
2542}
2543
2544/**
2545 * xmlLineNumbersDefault:
2546 * @val: int 0 or 1
2547 *
2548 * Set and return the previous value for enabling line numbers in elements
2549 * contents. This may break on old application and is turned off by default.
2550 *
2551 * Returns the last value for 0 for no substitution, 1 for substitution.
2552 */
2553
2554int
2555xmlLineNumbersDefault(int val) {
2556 int old = xmlLineNumbersDefaultValue;
2557
2558 xmlLineNumbersDefaultValue = val;
2559 return(old);
2560}
2561
2562/**
2563 * xmlSubstituteEntitiesDefault:
2564 * @val: int 0 or 1
2565 *
2566 * Set and return the previous value for default entity support.
2567 * Initially the parser always keep entity references instead of substituting
2568 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002569 * default parser behavior
2570 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002571 * file basis.
2572 *
2573 * Returns the last value for 0 for no substitution, 1 for substitution.
2574 */
2575
2576int
2577xmlSubstituteEntitiesDefault(int val) {
2578 int old = xmlSubstituteEntitiesDefaultValue;
2579
2580 xmlSubstituteEntitiesDefaultValue = val;
2581 return(old);
2582}
2583
2584/**
2585 * xmlKeepBlanksDefault:
2586 * @val: int 0 or 1
2587 *
2588 * Set and return the previous value for default blanks text nodes support.
2589 * The 1.x version of the parser used an heuristic to try to detect
2590 * ignorable white spaces. As a result the SAX callback was generating
2591 * ignorableWhitespace() callbacks instead of characters() one, and when
2592 * using the DOM output text nodes containing those blanks were not generated.
2593 * The 2.x and later version will switch to the XML standard way and
2594 * ignorableWhitespace() are only generated when running the parser in
2595 * validating mode and when the current element doesn't allow CDATA or
2596 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002597 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002598 * on 1.X libs and to switch back to the old mode for compatibility when
2599 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2600 * by using xmlIsBlankNode() commodity function to detect the "empty"
2601 * nodes generated.
2602 * This value also affect autogeneration of indentation when saving code
2603 * if blanks sections are kept, indentation is not generated.
2604 *
2605 * Returns the last value for 0 for no substitution, 1 for substitution.
2606 */
2607
2608int
2609xmlKeepBlanksDefault(int val) {
2610 int old = xmlKeepBlanksDefaultValue;
2611
2612 xmlKeepBlanksDefaultValue = val;
2613 xmlIndentTreeOutput = !val;
2614 return(old);
2615}
2616
2617/************************************************************************
2618 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002619 * Deprecated functions kept for compatibility *
2620 * *
2621 ************************************************************************/
2622
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002623/**
2624 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002625 * @lang: pointer to the string value
2626 *
2627 * Checks that the value conforms to the LanguageID production:
2628 *
2629 * NOTE: this is somewhat deprecated, those productions were removed from
2630 * the XML Second edition.
2631 *
2632 * [33] LanguageID ::= Langcode ('-' Subcode)*
2633 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2634 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2635 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2636 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2637 * [38] Subcode ::= ([a-z] | [A-Z])+
2638 *
2639 * Returns 1 if correct 0 otherwise
2640 **/
2641int
2642xmlCheckLanguageID(const xmlChar *lang) {
2643 const xmlChar *cur = lang;
2644
2645 if (cur == NULL)
2646 return(0);
2647 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2648 ((cur[0] == 'I') && (cur[1] == '-'))) {
2649 /*
2650 * IANA code
2651 */
2652 cur += 2;
2653 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2654 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2655 cur++;
2656 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2657 ((cur[0] == 'X') && (cur[1] == '-'))) {
2658 /*
2659 * User code
2660 */
2661 cur += 2;
2662 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2663 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2664 cur++;
2665 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2666 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2667 /*
2668 * ISO639
2669 */
2670 cur++;
2671 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2672 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2673 cur++;
2674 else
2675 return(0);
2676 } else
2677 return(0);
2678 while (cur[0] != 0) { /* non input consuming */
2679 if (cur[0] != '-')
2680 return(0);
2681 cur++;
2682 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2683 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2684 cur++;
2685 else
2686 return(0);
2687 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2688 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2689 cur++;
2690 }
2691 return(1);
2692}
2693
2694/**
2695 * xmlDecodeEntities:
2696 * @ctxt: the parser context
2697 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2698 * @len: the len to decode (in bytes !), -1 for no size limit
2699 * @end: an end marker xmlChar, 0 if none
2700 * @end2: an end marker xmlChar, 0 if none
2701 * @end3: an end marker xmlChar, 0 if none
2702 *
2703 * This function is deprecated, we now always process entities content
2704 * through xmlStringDecodeEntities
2705 *
2706 * TODO: remove it in next major release.
2707 *
2708 * [67] Reference ::= EntityRef | CharRef
2709 *
2710 * [69] PEReference ::= '%' Name ';'
2711 *
2712 * Returns A newly allocated string with the substitution done. The caller
2713 * must deallocate it !
2714 */
2715xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002716xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2717 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002718#if 0
2719 xmlChar *buffer = NULL;
2720 unsigned int buffer_size = 0;
2721 unsigned int nbchars = 0;
2722
2723 xmlChar *current = NULL;
2724 xmlEntityPtr ent;
2725 unsigned int max = (unsigned int) len;
2726 int c,l;
2727#endif
2728
2729 static int deprecated = 0;
2730 if (!deprecated) {
2731 xmlGenericError(xmlGenericErrorContext,
2732 "xmlDecodeEntities() deprecated function reached\n");
2733 deprecated = 1;
2734 }
2735
2736#if 0
2737 if (ctxt->depth > 40) {
2738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2739 ctxt->sax->error(ctxt->userData,
2740 "Detected entity reference loop\n");
2741 ctxt->wellFormed = 0;
2742 ctxt->disableSAX = 1;
2743 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2744 return(NULL);
2745 }
2746
2747 /*
2748 * allocate a translation buffer.
2749 */
2750 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2751 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2752 if (buffer == NULL) {
2753 perror("xmlDecodeEntities: malloc failed");
2754 return(NULL);
2755 }
2756
2757 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002758 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002759 */
2760 GROW;
2761 c = CUR_CHAR(l);
2762 while ((nbchars < max) && (c != end) && /* NOTUSED */
2763 (c != end2) && (c != end3)) {
2764 GROW;
2765 if (c == 0) break;
2766 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2767 int val = xmlParseCharRef(ctxt);
2768 COPY_BUF(0,buffer,nbchars,val);
2769 NEXTL(l);
2770 } else if ((c == '&') && (ctxt->token != '&') &&
2771 (what & XML_SUBSTITUTE_REF)) {
2772 if (xmlParserDebugEntities)
2773 xmlGenericError(xmlGenericErrorContext,
2774 "decoding Entity Reference\n");
2775 ent = xmlParseEntityRef(ctxt);
2776 if ((ent != NULL) &&
2777 (ctxt->replaceEntities != 0)) {
2778 current = ent->content;
2779 while (*current != 0) { /* non input consuming loop */
2780 buffer[nbchars++] = *current++;
2781 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2782 growBuffer(buffer);
2783 }
2784 }
2785 } else if (ent != NULL) {
2786 const xmlChar *cur = ent->name;
2787
2788 buffer[nbchars++] = '&';
2789 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2790 growBuffer(buffer);
2791 }
2792 while (*cur != 0) { /* non input consuming loop */
2793 buffer[nbchars++] = *cur++;
2794 }
2795 buffer[nbchars++] = ';';
2796 }
2797 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2798 /*
2799 * a PEReference induce to switch the entity flow,
2800 * we break here to flush the current set of chars
2801 * parsed if any. We will be called back later.
2802 */
2803 if (xmlParserDebugEntities)
2804 xmlGenericError(xmlGenericErrorContext,
2805 "decoding PE Reference\n");
2806 if (nbchars != 0) break;
2807
2808 xmlParsePEReference(ctxt);
2809
2810 /*
2811 * Pop-up of finished entities.
2812 */
2813 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2814 xmlPopInput(ctxt);
2815
2816 break;
2817 } else {
2818 COPY_BUF(l,buffer,nbchars,c);
2819 NEXTL(l);
2820 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2821 growBuffer(buffer);
2822 }
2823 }
2824 c = CUR_CHAR(l);
2825 }
2826 buffer[nbchars++] = 0;
2827 return(buffer);
2828#endif
2829 return(NULL);
2830}
2831
2832/**
2833 * xmlNamespaceParseNCName:
2834 * @ctxt: an XML parser context
2835 *
2836 * parse an XML namespace name.
2837 *
2838 * TODO: this seems not in use anymore, the namespace handling is done on
2839 * top of the SAX interfaces, i.e. not on raw input.
2840 *
2841 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2842 *
2843 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2844 * CombiningChar | Extender
2845 *
2846 * Returns the namespace name or NULL
2847 */
2848
2849xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002850xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002851#if 0
2852 xmlChar buf[XML_MAX_NAMELEN + 5];
2853 int len = 0, l;
2854 int cur = CUR_CHAR(l);
2855#endif
2856
2857 static int deprecated = 0;
2858 if (!deprecated) {
2859 xmlGenericError(xmlGenericErrorContext,
2860 "xmlNamespaceParseNCName() deprecated function reached\n");
2861 deprecated = 1;
2862 }
2863
2864#if 0
2865 /* load first the value of the char !!! */
2866 GROW;
2867 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2868
2869xmlGenericError(xmlGenericErrorContext,
2870 "xmlNamespaceParseNCName: reached loop 3\n");
2871 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2872 (cur == '.') || (cur == '-') ||
2873 (cur == '_') ||
2874 (IS_COMBINING(cur)) ||
2875 (IS_EXTENDER(cur))) {
2876 COPY_BUF(l,buf,len,cur);
2877 NEXTL(l);
2878 cur = CUR_CHAR(l);
2879 if (len >= XML_MAX_NAMELEN) {
2880 xmlGenericError(xmlGenericErrorContext,
2881 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2882 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2883 (cur == '.') || (cur == '-') ||
2884 (cur == '_') ||
2885 (IS_COMBINING(cur)) ||
2886 (IS_EXTENDER(cur))) {
2887 NEXTL(l);
2888 cur = CUR_CHAR(l);
2889 }
2890 break;
2891 }
2892 }
2893 return(xmlStrndup(buf, len));
2894#endif
2895 return(NULL);
2896}
2897
2898/**
2899 * xmlNamespaceParseQName:
2900 * @ctxt: an XML parser context
2901 * @prefix: a xmlChar **
2902 *
2903 * TODO: this seems not in use anymore, the namespace handling is done on
2904 * top of the SAX interfaces, i.e. not on raw input.
2905 *
2906 * parse an XML qualified name
2907 *
2908 * [NS 5] QName ::= (Prefix ':')? LocalPart
2909 *
2910 * [NS 6] Prefix ::= NCName
2911 *
2912 * [NS 7] LocalPart ::= NCName
2913 *
2914 * Returns the local part, and prefix is updated
2915 * to get the Prefix if any.
2916 */
2917
2918xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002919xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002920
2921 static int deprecated = 0;
2922 if (!deprecated) {
2923 xmlGenericError(xmlGenericErrorContext,
2924 "xmlNamespaceParseQName() deprecated function reached\n");
2925 deprecated = 1;
2926 }
2927
2928#if 0
2929 xmlChar *ret = NULL;
2930
2931 *prefix = NULL;
2932 ret = xmlNamespaceParseNCName(ctxt);
2933 if (RAW == ':') {
2934 *prefix = ret;
2935 NEXT;
2936 ret = xmlNamespaceParseNCName(ctxt);
2937 }
2938
2939 return(ret);
2940#endif
2941 return(NULL);
2942}
2943
2944/**
2945 * xmlNamespaceParseNSDef:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse a namespace prefix declaration
2949 *
2950 * TODO: this seems not in use anymore, the namespace handling is done on
2951 * top of the SAX interfaces, i.e. not on raw input.
2952 *
2953 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2954 *
2955 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2956 *
2957 * Returns the namespace name
2958 */
2959
2960xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002961xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002962 static int deprecated = 0;
2963 if (!deprecated) {
2964 xmlGenericError(xmlGenericErrorContext,
2965 "xmlNamespaceParseNSDef() deprecated function reached\n");
2966 deprecated = 1;
2967 }
2968 return(NULL);
2969#if 0
2970 xmlChar *name = NULL;
2971
2972 if ((RAW == 'x') && (NXT(1) == 'm') &&
2973 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2974 (NXT(4) == 's')) {
2975 SKIP(5);
2976 if (RAW == ':') {
2977 NEXT;
2978 name = xmlNamespaceParseNCName(ctxt);
2979 }
2980 }
2981 return(name);
2982#endif
2983}
2984
2985/**
2986 * xmlParseQuotedString:
2987 * @ctxt: an XML parser context
2988 *
2989 * Parse and return a string between quotes or doublequotes
2990 *
2991 * TODO: Deprecated, to be removed at next drop of binary compatibility
2992 *
2993 * Returns the string parser or NULL.
2994 */
2995xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002996xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002997 static int deprecated = 0;
2998 if (!deprecated) {
2999 xmlGenericError(xmlGenericErrorContext,
3000 "xmlParseQuotedString() deprecated function reached\n");
3001 deprecated = 1;
3002 }
3003 return(NULL);
3004
3005#if 0
3006 xmlChar *buf = NULL;
3007 int len = 0,l;
3008 int size = XML_PARSER_BUFFER_SIZE;
3009 int c;
3010
3011 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3012 if (buf == NULL) {
3013 xmlGenericError(xmlGenericErrorContext,
3014 "malloc of %d byte failed\n", size);
3015 return(NULL);
3016 }
3017xmlGenericError(xmlGenericErrorContext,
3018 "xmlParseQuotedString: reached loop 4\n");
3019 if (RAW == '"') {
3020 NEXT;
3021 c = CUR_CHAR(l);
3022 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3023 if (len + 5 >= size) {
3024 size *= 2;
3025 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3026 if (buf == NULL) {
3027 xmlGenericError(xmlGenericErrorContext,
3028 "realloc of %d byte failed\n", size);
3029 return(NULL);
3030 }
3031 }
3032 COPY_BUF(l,buf,len,c);
3033 NEXTL(l);
3034 c = CUR_CHAR(l);
3035 }
3036 if (c != '"') {
3037 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3039 ctxt->sax->error(ctxt->userData,
3040 "String not closed \"%.50s\"\n", buf);
3041 ctxt->wellFormed = 0;
3042 ctxt->disableSAX = 1;
3043 } else {
3044 NEXT;
3045 }
3046 } else if (RAW == '\''){
3047 NEXT;
3048 c = CUR;
3049 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3050 if (len + 1 >= size) {
3051 size *= 2;
3052 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3053 if (buf == NULL) {
3054 xmlGenericError(xmlGenericErrorContext,
3055 "realloc of %d byte failed\n", size);
3056 return(NULL);
3057 }
3058 }
3059 buf[len++] = c;
3060 NEXT;
3061 c = CUR;
3062 }
3063 if (RAW != '\'') {
3064 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3066 ctxt->sax->error(ctxt->userData,
3067 "String not closed \"%.50s\"\n", buf);
3068 ctxt->wellFormed = 0;
3069 ctxt->disableSAX = 1;
3070 } else {
3071 NEXT;
3072 }
3073 }
3074 return(buf);
3075#endif
3076}
3077
3078/**
3079 * xmlParseNamespace:
3080 * @ctxt: an XML parser context
3081 *
3082 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3083 *
3084 * This is what the older xml-name Working Draft specified, a bunch of
3085 * other stuff may still rely on it, so support is still here as
3086 * if it was declared on the root of the Tree:-(
3087 *
3088 * TODO: remove from library
3089 *
3090 * To be removed at next drop of binary compatibility
3091 */
3092
3093void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003094xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003095 static int deprecated = 0;
3096 if (!deprecated) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "xmlParseNamespace() deprecated function reached\n");
3099 deprecated = 1;
3100 }
3101
3102#if 0
3103 xmlChar *href = NULL;
3104 xmlChar *prefix = NULL;
3105 int garbage = 0;
3106
3107 /*
3108 * We just skipped "namespace" or "xml:namespace"
3109 */
3110 SKIP_BLANKS;
3111
3112xmlGenericError(xmlGenericErrorContext,
3113 "xmlParseNamespace: reached loop 5\n");
3114 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3115 /*
3116 * We can have "ns" or "prefix" attributes
3117 * Old encoding as 'href' or 'AS' attributes is still supported
3118 */
3119 if ((RAW == 'n') && (NXT(1) == 's')) {
3120 garbage = 0;
3121 SKIP(2);
3122 SKIP_BLANKS;
3123
3124 if (RAW != '=') continue;
3125 NEXT;
3126 SKIP_BLANKS;
3127
3128 href = xmlParseQuotedString(ctxt);
3129 SKIP_BLANKS;
3130 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3131 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3132 garbage = 0;
3133 SKIP(4);
3134 SKIP_BLANKS;
3135
3136 if (RAW != '=') continue;
3137 NEXT;
3138 SKIP_BLANKS;
3139
3140 href = xmlParseQuotedString(ctxt);
3141 SKIP_BLANKS;
3142 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3143 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3144 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3145 garbage = 0;
3146 SKIP(6);
3147 SKIP_BLANKS;
3148
3149 if (RAW != '=') continue;
3150 NEXT;
3151 SKIP_BLANKS;
3152
3153 prefix = xmlParseQuotedString(ctxt);
3154 SKIP_BLANKS;
3155 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3156 garbage = 0;
3157 SKIP(2);
3158 SKIP_BLANKS;
3159
3160 if (RAW != '=') continue;
3161 NEXT;
3162 SKIP_BLANKS;
3163
3164 prefix = xmlParseQuotedString(ctxt);
3165 SKIP_BLANKS;
3166 } else if ((RAW == '?') && (NXT(1) == '>')) {
3167 garbage = 0;
3168 NEXT;
3169 } else {
3170 /*
3171 * Found garbage when parsing the namespace
3172 */
3173 if (!garbage) {
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "xmlParseNamespace found garbage\n");
3177 }
3178 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3179 ctxt->wellFormed = 0;
3180 ctxt->disableSAX = 1;
3181 NEXT;
3182 }
3183 }
3184
3185 MOVETO_ENDTAG(CUR_PTR);
3186 NEXT;
3187
3188 /*
3189 * Register the DTD.
3190 if (href != NULL)
3191 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3192 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3193 */
3194
3195 if (prefix != NULL) xmlFree(prefix);
3196 if (href != NULL) xmlFree(href);
3197#endif
3198}
3199
3200/**
3201 * xmlScanName:
3202 * @ctxt: an XML parser context
3203 *
3204 * Trickery: parse an XML name but without consuming the input flow
3205 * Needed for rollback cases. Used only when parsing entities references.
3206 *
3207 * TODO: seems deprecated now, only used in the default part of
3208 * xmlParserHandleReference
3209 *
3210 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3211 * CombiningChar | Extender
3212 *
3213 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3214 *
3215 * [6] Names ::= Name (S Name)*
3216 *
3217 * Returns the Name parsed or NULL
3218 */
3219
3220xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003221xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003222 static int deprecated = 0;
3223 if (!deprecated) {
3224 xmlGenericError(xmlGenericErrorContext,
3225 "xmlScanName() deprecated function reached\n");
3226 deprecated = 1;
3227 }
3228 return(NULL);
3229
3230#if 0
3231 xmlChar buf[XML_MAX_NAMELEN];
3232 int len = 0;
3233
3234 GROW;
3235 if (!IS_LETTER(RAW) && (RAW != '_') &&
3236 (RAW != ':')) {
3237 return(NULL);
3238 }
3239
3240
3241 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3242 (NXT(len) == '.') || (NXT(len) == '-') ||
3243 (NXT(len) == '_') || (NXT(len) == ':') ||
3244 (IS_COMBINING(NXT(len))) ||
3245 (IS_EXTENDER(NXT(len)))) {
3246 GROW;
3247 buf[len] = NXT(len);
3248 len++;
3249 if (len >= XML_MAX_NAMELEN) {
3250 xmlGenericError(xmlGenericErrorContext,
3251 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3252 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3253 (IS_DIGIT(NXT(len))) ||
3254 (NXT(len) == '.') || (NXT(len) == '-') ||
3255 (NXT(len) == '_') || (NXT(len) == ':') ||
3256 (IS_COMBINING(NXT(len))) ||
3257 (IS_EXTENDER(NXT(len))))
3258 len++;
3259 break;
3260 }
3261 }
3262 return(xmlStrndup(buf, len));
3263#endif
3264}
3265
3266/**
3267 * xmlParserHandleReference:
3268 * @ctxt: the parser context
3269 *
3270 * TODO: Remove, now deprecated ... the test is done directly in the
3271 * content parsing
3272 * routines.
3273 *
3274 * [67] Reference ::= EntityRef | CharRef
3275 *
3276 * [68] EntityRef ::= '&' Name ';'
3277 *
3278 * [ WFC: Entity Declared ]
3279 * the Name given in the entity reference must match that in an entity
3280 * declaration, except that well-formed documents need not declare any
3281 * of the following entities: amp, lt, gt, apos, quot.
3282 *
3283 * [ WFC: Parsed Entity ]
3284 * An entity reference must not contain the name of an unparsed entity
3285 *
3286 * [66] CharRef ::= '&#' [0-9]+ ';' |
3287 * '&#x' [0-9a-fA-F]+ ';'
3288 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003289 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003290 * the handling is done accordingly to
3291 * http://www.w3.org/TR/REC-xml#entproc
3292 */
3293void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003294xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003295 static int deprecated = 0;
3296 if (!deprecated) {
3297 xmlGenericError(xmlGenericErrorContext,
3298 "xmlParserHandleReference() deprecated function reached\n");
3299 deprecated = 1;
3300 }
3301
3302#if 0
3303 xmlParserInputPtr input;
3304 xmlChar *name;
3305 xmlEntityPtr ent = NULL;
3306
3307 if (ctxt->token != 0) {
3308 return;
3309 }
3310 if (RAW != '&') return;
3311 GROW;
3312 if ((RAW == '&') && (NXT(1) == '#')) {
3313 switch(ctxt->instate) {
3314 case XML_PARSER_ENTITY_DECL:
3315 case XML_PARSER_PI:
3316 case XML_PARSER_CDATA_SECTION:
3317 case XML_PARSER_COMMENT:
3318 case XML_PARSER_SYSTEM_LITERAL:
3319 /* we just ignore it there */
3320 return;
3321 case XML_PARSER_START_TAG:
3322 return;
3323 case XML_PARSER_END_TAG:
3324 return;
3325 case XML_PARSER_EOF:
3326 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3328 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3329 ctxt->wellFormed = 0;
3330 ctxt->disableSAX = 1;
3331 return;
3332 case XML_PARSER_PROLOG:
3333 case XML_PARSER_START:
3334 case XML_PARSER_MISC:
3335 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3338 ctxt->wellFormed = 0;
3339 ctxt->disableSAX = 1;
3340 return;
3341 case XML_PARSER_EPILOG:
3342 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 return;
3348 case XML_PARSER_DTD:
3349 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003352 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 return;
3356 case XML_PARSER_ENTITY_VALUE:
3357 /*
3358 * NOTE: in the case of entity values, we don't do the
3359 * substitution here since we need the literal
3360 * entity value to be able to save the internal
3361 * subset of the document.
3362 * This will be handled by xmlStringDecodeEntities
3363 */
3364 return;
3365 case XML_PARSER_CONTENT:
3366 return;
3367 case XML_PARSER_ATTRIBUTE_VALUE:
3368 /* ctxt->token = xmlParseCharRef(ctxt); */
3369 return;
3370 case XML_PARSER_IGNORE:
3371 return;
3372 }
3373 return;
3374 }
3375
3376 switch(ctxt->instate) {
3377 case XML_PARSER_CDATA_SECTION:
3378 return;
3379 case XML_PARSER_PI:
3380 case XML_PARSER_COMMENT:
3381 case XML_PARSER_SYSTEM_LITERAL:
3382 case XML_PARSER_CONTENT:
3383 return;
3384 case XML_PARSER_START_TAG:
3385 return;
3386 case XML_PARSER_END_TAG:
3387 return;
3388 case XML_PARSER_EOF:
3389 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3391 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3392 ctxt->wellFormed = 0;
3393 ctxt->disableSAX = 1;
3394 return;
3395 case XML_PARSER_PROLOG:
3396 case XML_PARSER_START:
3397 case XML_PARSER_MISC:
3398 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3400 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3401 ctxt->wellFormed = 0;
3402 ctxt->disableSAX = 1;
3403 return;
3404 case XML_PARSER_EPILOG:
3405 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3407 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3408 ctxt->wellFormed = 0;
3409 ctxt->disableSAX = 1;
3410 return;
3411 case XML_PARSER_ENTITY_VALUE:
3412 /*
3413 * NOTE: in the case of entity values, we don't do the
3414 * substitution here since we need the literal
3415 * entity value to be able to save the internal
3416 * subset of the document.
3417 * This will be handled by xmlStringDecodeEntities
3418 */
3419 return;
3420 case XML_PARSER_ATTRIBUTE_VALUE:
3421 /*
3422 * NOTE: in the case of attributes values, we don't do the
3423 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003424 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003425 * entities. The SAX callback is called with values
3426 * without entity substitution.
3427 * This will then be handled by xmlStringDecodeEntities
3428 */
3429 return;
3430 case XML_PARSER_ENTITY_DECL:
3431 /*
3432 * we just ignore it there
3433 * the substitution will be done once the entity is referenced
3434 */
3435 return;
3436 case XML_PARSER_DTD:
3437 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3439 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003440 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 ctxt->disableSAX = 1;
3443 return;
3444 case XML_PARSER_IGNORE:
3445 return;
3446 }
3447
3448/* TODO: this seems not reached anymore .... Verify ... */
3449xmlGenericError(xmlGenericErrorContext,
3450 "Reached deprecated section in xmlParserHandleReference()\n");
3451xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003452 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003453xmlGenericError(xmlGenericErrorContext,
3454 "indicating the version: %s, thanks !\n", xmlParserVersion);
3455 NEXT;
3456 name = xmlScanName(ctxt);
3457 if (name == NULL) {
3458 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3460 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3461 ctxt->wellFormed = 0;
3462 ctxt->disableSAX = 1;
3463 ctxt->token = '&';
3464 return;
3465 }
3466 if (NXT(xmlStrlen(name)) != ';') {
3467 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3469 ctxt->sax->error(ctxt->userData,
3470 "Entity reference: ';' expected\n");
3471 ctxt->wellFormed = 0;
3472 ctxt->disableSAX = 1;
3473 ctxt->token = '&';
3474 xmlFree(name);
3475 return;
3476 }
3477 SKIP(xmlStrlen(name) + 1);
3478 if (ctxt->sax != NULL) {
3479 if (ctxt->sax->getEntity != NULL)
3480 ent = ctxt->sax->getEntity(ctxt->userData, name);
3481 }
3482
3483 /*
3484 * [ WFC: Entity Declared ]
3485 * the Name given in the entity reference must match that in an entity
3486 * declaration, except that well-formed documents need not declare any
3487 * of the following entities: amp, lt, gt, apos, quot.
3488 */
3489 if (ent == NULL)
3490 ent = xmlGetPredefinedEntity(name);
3491 if (ent == NULL) {
3492 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Entity reference: entity %s not declared\n",
3496 name);
3497 ctxt->wellFormed = 0;
3498 ctxt->disableSAX = 1;
3499 xmlFree(name);
3500 return;
3501 }
3502
3503 /*
3504 * [ WFC: Parsed Entity ]
3505 * An entity reference must not contain the name of an unparsed entity
3506 */
3507 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3508 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "Entity reference to unparsed entity %s\n", name);
3512 ctxt->wellFormed = 0;
3513 ctxt->disableSAX = 1;
3514 }
3515
3516 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3517 ctxt->token = ent->content[0];
3518 xmlFree(name);
3519 return;
3520 }
3521 input = xmlNewEntityInputStream(ctxt, ent);
3522 xmlPushInput(ctxt, input);
3523 xmlFree(name);
3524#endif
3525 return;
3526}
3527
3528/**
3529 * xmlHandleEntity:
3530 * @ctxt: an XML parser context
3531 * @entity: an XML entity pointer.
3532 *
3533 * Default handling of defined entities, when should we define a new input
3534 * stream ? When do we just handle that as a set of chars ?
3535 *
3536 * OBSOLETE: to be removed at some point.
3537 */
3538
3539void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003540xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003541 static int deprecated = 0;
3542 if (!deprecated) {
3543 xmlGenericError(xmlGenericErrorContext,
3544 "xmlHandleEntity() deprecated function reached\n");
3545 deprecated = 1;
3546 }
3547
3548#if 0
3549 int len;
3550 xmlParserInputPtr input;
3551
3552 if (entity->content == NULL) {
3553 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3555 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3556 entity->name);
3557 ctxt->wellFormed = 0;
3558 ctxt->disableSAX = 1;
3559 return;
3560 }
3561 len = xmlStrlen(entity->content);
3562 if (len <= 2) goto handle_as_char;
3563
3564 /*
3565 * Redefine its content as an input stream.
3566 */
3567 input = xmlNewEntityInputStream(ctxt, entity);
3568 xmlPushInput(ctxt, input);
3569 return;
3570
3571handle_as_char:
3572 /*
3573 * Just handle the content as a set of chars.
3574 */
3575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3576 (ctxt->sax->characters != NULL))
3577 ctxt->sax->characters(ctxt->userData, entity->content, len);
3578#endif
3579}
3580
3581/**
3582 * xmlNewGlobalNs:
3583 * @doc: the document carrying the namespace
3584 * @href: the URI associated
3585 * @prefix: the prefix for the namespace
3586 *
3587 * Creation of a Namespace, the old way using PI and without scoping
3588 * DEPRECATED !!!
3589 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003590 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003591 */
3592xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003593xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3594 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003595 static int deprecated = 0;
3596 if (!deprecated) {
3597 xmlGenericError(xmlGenericErrorContext,
3598 "xmlNewGlobalNs() deprecated function reached\n");
3599 deprecated = 1;
3600 }
3601 return(NULL);
3602#if 0
3603 xmlNodePtr root;
3604
3605 xmlNsPtr cur;
3606
3607 root = xmlDocGetRootElement(doc);
3608 if (root != NULL)
3609 return(xmlNewNs(root, href, prefix));
3610
3611 /*
3612 * if there is no root element yet, create an old Namespace type
3613 * and it will be moved to the root at save time.
3614 */
3615 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3616 if (cur == NULL) {
3617 xmlGenericError(xmlGenericErrorContext,
3618 "xmlNewGlobalNs : malloc failed\n");
3619 return(NULL);
3620 }
3621 memset(cur, 0, sizeof(xmlNs));
3622 cur->type = XML_GLOBAL_NAMESPACE;
3623
3624 if (href != NULL)
3625 cur->href = xmlStrdup(href);
3626 if (prefix != NULL)
3627 cur->prefix = xmlStrdup(prefix);
3628
3629 /*
3630 * Add it at the end to preserve parsing order ...
3631 */
3632 if (doc != NULL) {
3633 if (doc->oldNs == NULL) {
3634 doc->oldNs = cur;
3635 } else {
3636 xmlNsPtr prev = doc->oldNs;
3637
3638 while (prev->next != NULL) prev = prev->next;
3639 prev->next = cur;
3640 }
3641 }
3642
3643 return(NULL);
3644#endif
3645}
3646
3647/**
3648 * xmlUpgradeOldNs:
3649 * @doc: a document pointer
3650 *
3651 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3652 * DEPRECATED
3653 */
3654void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003655xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003656 static int deprecated = 0;
3657 if (!deprecated) {
3658 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003659 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003660 deprecated = 1;
3661 }
3662#if 0
3663 xmlNsPtr cur;
3664
3665 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3666 if (doc->children == NULL) {
3667#ifdef DEBUG_TREE
3668 xmlGenericError(xmlGenericErrorContext,
3669 "xmlUpgradeOldNs: failed no root !\n");
3670#endif
3671 return;
3672 }
3673
3674 cur = doc->oldNs;
3675 while (cur->next != NULL) {
3676 cur->type = XML_LOCAL_NAMESPACE;
3677 cur = cur->next;
3678 }
3679 cur->type = XML_LOCAL_NAMESPACE;
3680 cur->next = doc->children->nsDef;
3681 doc->children->nsDef = doc->oldNs;
3682 doc->oldNs = NULL;
3683#endif
3684}
3685