blob: 031f3c1d61b6ecdee89b35a0c8ac186275d6fa95 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
Owen Taylor3473f882001-02-23 17:55:21 +00004 *
5 * See Copyright for the status of this software.
6 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00007 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00008 */
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
11
Daniel Veillard3c5ed912002-01-08 10:36:16 +000012#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000013#define XML_DIR_SEP '\\'
14#else
Owen Taylor3473f882001-02-23 17:55:21 +000015#define XML_DIR_SEP '/'
16#endif
17
Owen Taylor3473f882001-02-23 17:55:21 +000018#include <string.h>
19#ifdef HAVE_CTYPE_H
20#include <ctype.h>
21#endif
22#ifdef HAVE_STDLIB_H
23#include <stdlib.h>
24#endif
25#ifdef HAVE_SYS_STAT_H
26#include <sys/stat.h>
27#endif
28#ifdef HAVE_FCNTL_H
29#include <fcntl.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
38#include <libxml/xmlmemory.h>
39#include <libxml/tree.h>
40#include <libxml/parser.h>
41#include <libxml/parserInternals.h>
42#include <libxml/valid.h>
43#include <libxml/entities.h>
44#include <libxml/xmlerror.h>
45#include <libxml/encoding.h>
46#include <libxml/valid.h>
47#include <libxml/xmlIO.h>
48#include <libxml/uri.h>
Daniel Veillard16698282001-09-14 10:29:27 +000049#include <libxml/SAX.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000050#ifdef LIBXML_CATALOG_ENABLED
51#include <libxml/catalog.h>
52#endif
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000053#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000054
Daniel Veillard56a4cb82001-03-24 17:00:36 +000055void xmlUpgradeOldNs(xmlDocPtr doc);
Owen Taylor3473f882001-02-23 17:55:21 +000056
Daniel Veillarda53c6882001-07-25 17:18:57 +000057/*
58 * Various global defaults for parsing
59 */
Daniel Veillarda53c6882001-07-25 17:18:57 +000060#ifdef VMS
61int xmlSubstituteEntitiesDefaultVal = 0;
62#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
63int xmlDoValidityCheckingDefaultVal = 0;
64#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
Daniel Veillarda53c6882001-07-25 17:18:57 +000065#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066
Daniel Veillard5e2dace2001-07-18 19:30:27 +000067/**
Owen Taylor3473f882001-02-23 17:55:21 +000068 * xmlCheckVersion:
69 * @version: the include version number
70 *
71 * check the compiled lib version against the include one.
72 * This can warn or immediately kill the application
73 */
74void
75xmlCheckVersion(int version) {
76 int myversion = (int) LIBXML_VERSION;
77
Daniel Veillard6f350292001-10-14 09:56:15 +000078 xmlInitParser();
Daniel Veillard4de4d3b2001-05-07 20:50:47 +000079
Owen Taylor3473f882001-02-23 17:55:21 +000080 if ((myversion / 10000) != (version / 10000)) {
81 xmlGenericError(xmlGenericErrorContext,
82 "Fatal: program compiled against libxml %d using libxml %d\n",
83 (version / 10000), (myversion / 10000));
Daniel Veillardc69e0b12001-11-20 08:35:07 +000084 fprintf(stderr,
85 "Fatal: program compiled against libxml %d using libxml %d\n",
86 (version / 10000), (myversion / 10000));
Owen Taylor3473f882001-02-23 17:55:21 +000087 }
88 if ((myversion / 100) < (version / 100)) {
89 xmlGenericError(xmlGenericErrorContext,
90 "Warning: program compiled against libxml %d using older %d\n",
91 (version / 100), (myversion / 100));
92 }
93}
94
95
Daniel Veillard22090732001-07-16 00:06:07 +000096static const char *xmlFeaturesList[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000097 "validate",
98 "load subset",
99 "keep blanks",
100 "disable SAX",
101 "fetch external entities",
102 "substitute entities",
103 "gather line info",
104 "user data",
105 "is html",
106 "is standalone",
107 "stop parser",
108 "document",
109 "is well formed",
110 "is valid",
111 "SAX block",
112 "SAX function internalSubset",
113 "SAX function isStandalone",
114 "SAX function hasInternalSubset",
115 "SAX function hasExternalSubset",
116 "SAX function resolveEntity",
117 "SAX function getEntity",
118 "SAX function entityDecl",
119 "SAX function notationDecl",
120 "SAX function attributeDecl",
121 "SAX function elementDecl",
122 "SAX function unparsedEntityDecl",
123 "SAX function setDocumentLocator",
124 "SAX function startDocument",
125 "SAX function endDocument",
126 "SAX function startElement",
127 "SAX function endElement",
128 "SAX function reference",
129 "SAX function characters",
130 "SAX function ignorableWhitespace",
131 "SAX function processingInstruction",
132 "SAX function comment",
133 "SAX function warning",
134 "SAX function error",
135 "SAX function fatalError",
136 "SAX function getParameterEntity",
137 "SAX function cdataBlock",
138 "SAX function externalSubset",
139};
140
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000141/**
Owen Taylor3473f882001-02-23 17:55:21 +0000142 * xmlGetFeaturesList:
143 * @len: the length of the features name array (input/output)
144 * @result: an array of string to be filled with the features name.
145 *
146 * Copy at most *@len feature names into the @result array
147 *
148 * Returns -1 in case or error, or the total number of features,
149 * len is updated with the number of strings copied,
150 * strings must not be deallocated
151 */
152int
153xmlGetFeaturesList(int *len, const char **result) {
154 int ret, i;
155
156 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157 if ((len == NULL) || (result == NULL))
158 return(ret);
159 if ((*len < 0) || (*len >= 1000))
160 return(-1);
161 if (*len > ret)
162 *len = ret;
163 for (i = 0;i < *len;i++)
164 result[i] = xmlFeaturesList[i];
165 return(ret);
166}
167
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000168/**
Owen Taylor3473f882001-02-23 17:55:21 +0000169 * xmlGetFeature:
170 * @ctxt: an XML/HTML parser context
171 * @name: the feature name
172 * @result: location to store the result
173 *
174 * Read the current value of one feature of this parser instance
175 *
176 * Returns -1 in case or error, 0 otherwise
177 */
178int
179xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181 return(-1);
182
183 if (!strcmp(name, "validate")) {
184 *((int *) result) = ctxt->validate;
185 } else if (!strcmp(name, "keep blanks")) {
186 *((int *) result) = ctxt->keepBlanks;
187 } else if (!strcmp(name, "disable SAX")) {
188 *((int *) result) = ctxt->disableSAX;
189 } else if (!strcmp(name, "fetch external entities")) {
190 *((int *) result) = ctxt->loadsubset;
191 } else if (!strcmp(name, "substitute entities")) {
192 *((int *) result) = ctxt->replaceEntities;
193 } else if (!strcmp(name, "gather line info")) {
194 *((int *) result) = ctxt->record_info;
195 } else if (!strcmp(name, "user data")) {
196 *((void **)result) = ctxt->userData;
197 } else if (!strcmp(name, "is html")) {
198 *((int *) result) = ctxt->html;
199 } else if (!strcmp(name, "is standalone")) {
200 *((int *) result) = ctxt->standalone;
201 } else if (!strcmp(name, "document")) {
202 *((xmlDocPtr *) result) = ctxt->myDoc;
203 } else if (!strcmp(name, "is well formed")) {
204 *((int *) result) = ctxt->wellFormed;
205 } else if (!strcmp(name, "is valid")) {
206 *((int *) result) = ctxt->valid;
207 } else if (!strcmp(name, "SAX block")) {
208 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209 } else if (!strcmp(name, "SAX function internalSubset")) {
210 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211 } else if (!strcmp(name, "SAX function isStandalone")) {
212 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217 } else if (!strcmp(name, "SAX function resolveEntity")) {
218 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219 } else if (!strcmp(name, "SAX function getEntity")) {
220 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221 } else if (!strcmp(name, "SAX function entityDecl")) {
222 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223 } else if (!strcmp(name, "SAX function notationDecl")) {
224 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225 } else if (!strcmp(name, "SAX function attributeDecl")) {
226 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227 } else if (!strcmp(name, "SAX function elementDecl")) {
228 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233 } else if (!strcmp(name, "SAX function startDocument")) {
234 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235 } else if (!strcmp(name, "SAX function endDocument")) {
236 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237 } else if (!strcmp(name, "SAX function startElement")) {
238 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239 } else if (!strcmp(name, "SAX function endElement")) {
240 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241 } else if (!strcmp(name, "SAX function reference")) {
242 *((referenceSAXFunc *) result) = ctxt->sax->reference;
243 } else if (!strcmp(name, "SAX function characters")) {
244 *((charactersSAXFunc *) result) = ctxt->sax->characters;
245 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247 } else if (!strcmp(name, "SAX function processingInstruction")) {
248 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249 } else if (!strcmp(name, "SAX function comment")) {
250 *((commentSAXFunc *) result) = ctxt->sax->comment;
251 } else if (!strcmp(name, "SAX function warning")) {
252 *((warningSAXFunc *) result) = ctxt->sax->warning;
253 } else if (!strcmp(name, "SAX function error")) {
254 *((errorSAXFunc *) result) = ctxt->sax->error;
255 } else if (!strcmp(name, "SAX function fatalError")) {
256 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257 } else if (!strcmp(name, "SAX function getParameterEntity")) {
258 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259 } else if (!strcmp(name, "SAX function cdataBlock")) {
260 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261 } else if (!strcmp(name, "SAX function externalSubset")) {
262 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263 } else {
264 return(-1);
265 }
266 return(0);
267}
268
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000269/**
Owen Taylor3473f882001-02-23 17:55:21 +0000270 * xmlSetFeature:
271 * @ctxt: an XML/HTML parser context
272 * @name: the feature name
273 * @value: pointer to the location of the new value
274 *
275 * Change the current value of one feature of this parser instance
276 *
277 * Returns -1 in case or error, 0 otherwise
278 */
279int
280xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282 return(-1);
283
284 if (!strcmp(name, "validate")) {
285 int newvalidate = *((int *) value);
286 if ((!ctxt->validate) && (newvalidate != 0)) {
287 if (ctxt->vctxt.warning == NULL)
288 ctxt->vctxt.warning = xmlParserValidityWarning;
289 if (ctxt->vctxt.error == NULL)
290 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +0000291 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000292 }
293 ctxt->validate = newvalidate;
294 } else if (!strcmp(name, "keep blanks")) {
295 ctxt->keepBlanks = *((int *) value);
296 } else if (!strcmp(name, "disable SAX")) {
297 ctxt->disableSAX = *((int *) value);
298 } else if (!strcmp(name, "fetch external entities")) {
299 ctxt->loadsubset = *((int *) value);
300 } else if (!strcmp(name, "substitute entities")) {
301 ctxt->replaceEntities = *((int *) value);
302 } else if (!strcmp(name, "gather line info")) {
303 ctxt->record_info = *((int *) value);
304 } else if (!strcmp(name, "user data")) {
305 ctxt->userData = *((void **)value);
306 } else if (!strcmp(name, "is html")) {
307 ctxt->html = *((int *) value);
308 } else if (!strcmp(name, "is standalone")) {
309 ctxt->standalone = *((int *) value);
310 } else if (!strcmp(name, "document")) {
311 ctxt->myDoc = *((xmlDocPtr *) value);
312 } else if (!strcmp(name, "is well formed")) {
313 ctxt->wellFormed = *((int *) value);
314 } else if (!strcmp(name, "is valid")) {
315 ctxt->valid = *((int *) value);
316 } else if (!strcmp(name, "SAX block")) {
317 ctxt->sax = *((xmlSAXHandlerPtr *) value);
318 } else if (!strcmp(name, "SAX function internalSubset")) {
319 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function isStandalone")) {
321 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
323 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
325 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
326 } else if (!strcmp(name, "SAX function resolveEntity")) {
327 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function getEntity")) {
329 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
330 } else if (!strcmp(name, "SAX function entityDecl")) {
331 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function notationDecl")) {
333 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function attributeDecl")) {
335 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function elementDecl")) {
337 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
339 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
341 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function startDocument")) {
343 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function endDocument")) {
345 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function startElement")) {
347 ctxt->sax->startElement = *((startElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function endElement")) {
349 ctxt->sax->endElement = *((endElementSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function reference")) {
351 ctxt->sax->reference = *((referenceSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function characters")) {
353 ctxt->sax->characters = *((charactersSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
355 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function processingInstruction")) {
357 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function comment")) {
359 ctxt->sax->comment = *((commentSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function warning")) {
361 ctxt->sax->warning = *((warningSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function error")) {
363 ctxt->sax->error = *((errorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function fatalError")) {
365 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
366 } else if (!strcmp(name, "SAX function getParameterEntity")) {
367 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
368 } else if (!strcmp(name, "SAX function cdataBlock")) {
369 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
370 } else if (!strcmp(name, "SAX function externalSubset")) {
371 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
372 } else {
373 return(-1);
374 }
375 return(0);
376}
377
378/************************************************************************
379 * *
380 * Some functions to avoid too large macros *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlIsChar:
386 * @c: an unicode character (int)
387 *
388 * Check whether the character is allowed by the production
389 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
390 * | [#x10000-#x10FFFF]
391 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
392 * Also available as a macro IS_CHAR()
393 *
394 * Returns 0 if not, non-zero otherwise
395 */
396int
397xmlIsChar(int c) {
398 return(
399 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
400 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
401 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
402 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
403}
404
405/**
406 * xmlIsBlank:
407 * @c: an unicode character (int)
408 *
409 * Check whether the character is allowed by the production
410 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
411 * Also available as a macro IS_BLANK()
412 *
413 * Returns 0 if not, non-zero otherwise
414 */
415int
416xmlIsBlank(int c) {
417 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
418}
419
420/**
421 * xmlIsBaseChar:
422 * @c: an unicode character (int)
423 *
424 * Check whether the character is allowed by the production
425 * [85] BaseChar ::= ... long list see REC ...
426 *
427 * VI is your friend !
428 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
429 * and
430 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
431 *
432 * Returns 0 if not, non-zero otherwise
433 */
434static int xmlBaseArray[] = {
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
441 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
450 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
451};
452
453int
454xmlIsBaseChar(int c) {
455 return(
456 (((c) < 0x0100) ? xmlBaseArray[c] :
457 ( /* accelerator */
458 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
459 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
460 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
461 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
462 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
463 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
464 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
465 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
466 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
467 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
468 ((c) == 0x0386) ||
469 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
470 ((c) == 0x038C) ||
471 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
472 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
473 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
474 ((c) == 0x03DA) ||
475 ((c) == 0x03DC) ||
476 ((c) == 0x03DE) ||
477 ((c) == 0x03E0) ||
478 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
479 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
480 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
481 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
482 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
483 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
484 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
485 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
486 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
487 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
488 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
489 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
490 ((c) == 0x0559) ||
491 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
492 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
493 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
494 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
495 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
496 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
497 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
498 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
499 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
500 ((c) == 0x06D5) ||
501 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
502 (((c) >= 0x905) && ( /* accelerator */
503 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
504 ((c) == 0x093D) ||
505 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
506 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
507 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
508 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
509 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
510 ((c) == 0x09B2) ||
511 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
512 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
513 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
514 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
515 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
516 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
517 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
518 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
519 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
520 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
521 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
522 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
523 ((c) == 0x0A5E) ||
524 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
525 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
526 ((c) == 0x0A8D) ||
527 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
528 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
529 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
530 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
531 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
532 ((c) == 0x0ABD) ||
533 ((c) == 0x0AE0) ||
534 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
535 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
536 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
537 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
538 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
539 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
540 ((c) == 0x0B3D) ||
541 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
542 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
543 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
544 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
545 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
546 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
547 ((c) == 0x0B9C) ||
548 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
549 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
550 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
551 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
552 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
553 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
554 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
555 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
556 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
557 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
558 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
559 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
560 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
561 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
562 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
563 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
564 ((c) == 0x0CDE) ||
565 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
566 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
567 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
568 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
569 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
570 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
571 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
572 ((c) == 0x0E30) ||
573 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
574 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
575 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
576 ((c) == 0x0E84) ||
577 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
578 ((c) == 0x0E8A) ||
579 ((c) == 0x0E8D) ||
580 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
581 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
582 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
583 ((c) == 0x0EA5) ||
584 ((c) == 0x0EA7) ||
585 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
586 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
587 ((c) == 0x0EB0) ||
588 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
589 ((c) == 0x0EBD) ||
590 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
591 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
592 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
593 (((c) >= 0x10A0) && ( /* accelerator */
594 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
595 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
596 ((c) == 0x1100) ||
597 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
598 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
599 ((c) == 0x1109) ||
600 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
601 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
602 ((c) == 0x113C) ||
603 ((c) == 0x113E) ||
604 ((c) == 0x1140) ||
605 ((c) == 0x114C) ||
606 ((c) == 0x114E) ||
607 ((c) == 0x1150) ||
608 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
609 ((c) == 0x1159) ||
610 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
611 ((c) == 0x1163) ||
612 ((c) == 0x1165) ||
613 ((c) == 0x1167) ||
614 ((c) == 0x1169) ||
615 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
616 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
617 ((c) == 0x1175) ||
618 ((c) == 0x119E) ||
619 ((c) == 0x11A8) ||
620 ((c) == 0x11AB) ||
621 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
622 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
623 ((c) == 0x11BA) ||
624 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
625 ((c) == 0x11EB) ||
626 ((c) == 0x11F0) ||
627 ((c) == 0x11F9) ||
628 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
629 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
630 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
631 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
632 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
633 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
634 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
635 ((c) == 0x1F59) ||
636 ((c) == 0x1F5B) ||
637 ((c) == 0x1F5D) ||
638 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
639 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
640 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
641 ((c) == 0x1FBE) ||
642 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
643 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
644 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
645 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
646 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
647 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
648 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
649 ((c) == 0x2126) ||
650 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
651 ((c) == 0x212E) ||
652 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
653 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
654 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
655 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
656 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
657}
658
659/**
660 * xmlIsDigit:
661 * @c: an unicode character (int)
662 *
663 * Check whether the character is allowed by the production
664 * [88] Digit ::= ... long list see REC ...
665 *
666 * Returns 0 if not, non-zero otherwise
667 */
668int
669xmlIsDigit(int c) {
670 return(
671 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
672 (((c) >= 0x660) && ( /* accelerator */
673 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
674 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
675 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
676 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
677 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
678 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
679 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
680 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
681 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
682 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
683 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
684 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
685 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
686 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
687}
688
689/**
690 * xmlIsCombining:
691 * @c: an unicode character (int)
692 *
693 * Check whether the character is allowed by the production
694 * [87] CombiningChar ::= ... long list see REC ...
695 *
696 * Returns 0 if not, non-zero otherwise
697 */
698int
699xmlIsCombining(int c) {
700 return(
701 (((c) >= 0x300) && ( /* accelerator */
702 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
703 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
704 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
705 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
706 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
707 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
708 ((c) == 0x05BF) ||
709 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
710 ((c) == 0x05C4) ||
711 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
712 ((c) == 0x0670) ||
713 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
714 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
715 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
716 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
717 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
718 (((c) >= 0x0901) && ( /* accelerator */
719 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
720 ((c) == 0x093C) ||
721 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
722 ((c) == 0x094D) ||
723 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
724 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
725 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
726 ((c) == 0x09BC) ||
727 ((c) == 0x09BE) ||
728 ((c) == 0x09BF) ||
729 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
730 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
731 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
732 ((c) == 0x09D7) ||
733 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
734 (((c) >= 0x0A02) && ( /* accelerator */
735 ((c) == 0x0A02) ||
736 ((c) == 0x0A3C) ||
737 ((c) == 0x0A3E) ||
738 ((c) == 0x0A3F) ||
739 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
740 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
741 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
742 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
743 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
744 ((c) == 0x0ABC) ||
745 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
746 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
747 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
748 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
749 ((c) == 0x0B3C) ||
750 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
751 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
752 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
753 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
754 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
755 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
756 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
757 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
758 ((c) == 0x0BD7) ||
759 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
760 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
761 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
762 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
763 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
764 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
765 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
766 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
767 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
768 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
769 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
770 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
771 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
772 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
773 ((c) == 0x0D57) ||
774 (((c) >= 0x0E31) && ( /* accelerator */
775 ((c) == 0x0E31) ||
776 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
777 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
778 ((c) == 0x0EB1) ||
779 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
780 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
781 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
782 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 ((c) == 0x0F35) ||
784 ((c) == 0x0F37) ||
785 ((c) == 0x0F39) ||
786 ((c) == 0x0F3E) ||
787 ((c) == 0x0F3F) ||
788 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
789 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
790 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
791 ((c) == 0x0F97) ||
792 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
793 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
794 ((c) == 0x0FB9) ||
795 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
796 ((c) == 0x20E1) ||
797 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
798 ((c) == 0x3099) ||
799 ((c) == 0x309A))))))))));
800}
801
802/**
803 * xmlIsExtender:
804 * @c: an unicode character (int)
805 *
806 * Check whether the character is allowed by the production
807 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
808 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
809 * [#x309D-#x309E] | [#x30FC-#x30FE]
810 *
811 * Returns 0 if not, non-zero otherwise
812 */
813int
814xmlIsExtender(int c) {
815 switch (c) {
816 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
817 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
818 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
819 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
820 case 0x30FE:
821 return 1;
822 default:
823 return 0;
824 }
825}
826
827/**
828 * xmlIsIdeographic:
829 * @c: an unicode character (int)
830 *
831 * Check whether the character is allowed by the production
832 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
833 *
834 * Returns 0 if not, non-zero otherwise
835 */
836int
837xmlIsIdeographic(int c) {
838 return(((c) < 0x0100) ? 0 :
839 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
840 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
841 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 ((c) == 0x3007));
843}
844
845/**
846 * xmlIsLetter:
847 * @c: an unicode character (int)
848 *
849 * Check whether the character is allowed by the production
850 * [84] Letter ::= BaseChar | Ideographic
851 *
852 * Returns 0 if not, non-zero otherwise
853 */
854int
855xmlIsLetter(int c) {
856 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
857}
858
859/**
860 * xmlIsPubidChar:
861 * @c: an unicode character (int)
862 *
863 * Check whether the character is allowed by the production
864 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
865 *
866 * Returns 0 if not, non-zero otherwise
867 */
868int
869xmlIsPubidChar(int c) {
870 return(
871 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
872 (((c) >= 'a') && ((c) <= 'z')) ||
873 (((c) >= 'A') && ((c) <= 'Z')) ||
874 (((c) >= '0') && ((c) <= '9')) ||
875 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
876 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
877 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
878 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
879 ((c) == '$') || ((c) == '_') || ((c) == '%'));
880}
881
882/************************************************************************
883 * *
884 * Input handling functions for progressive parsing *
885 * *
886 ************************************************************************/
887
888/* #define DEBUG_INPUT */
889/* #define DEBUG_STACK */
890/* #define DEBUG_PUSH */
891
892
893/* we need to keep enough input to show errors in context */
894#define LINE_LEN 80
895
896#ifdef DEBUG_INPUT
897#define CHECK_BUFFER(in) check_buffer(in)
898
899void check_buffer(xmlParserInputPtr in) {
900 if (in->base != in->buf->buffer->content) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: base mismatch problem\n");
903 }
904 if (in->cur < in->base) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur < base problem\n");
907 }
908 if (in->cur > in->base + in->buf->buffer->use) {
909 xmlGenericError(xmlGenericErrorContext,
910 "xmlParserInput: cur > base + use problem\n");
911 }
912 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
913 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
914 in->buf->buffer->use, in->buf->buffer->size);
915}
916
917#else
918#define CHECK_BUFFER(in)
919#endif
920
921
922/**
923 * xmlParserInputRead:
924 * @in: an XML parser input
925 * @len: an indicative size for the lookahead
926 *
927 * This function refresh the input for the parser. It doesn't try to
928 * preserve pointers to the input buffer, and discard already read data
929 *
930 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
931 * end of this entity
932 */
933int
934xmlParserInputRead(xmlParserInputPtr in, int len) {
935 int ret;
936 int used;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000937 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000938
939#ifdef DEBUG_INPUT
940 xmlGenericError(xmlGenericErrorContext, "Read\n");
941#endif
942 if (in->buf == NULL) return(-1);
943 if (in->base == NULL) return(-1);
944 if (in->cur == NULL) return(-1);
945 if (in->buf->buffer == NULL) return(-1);
946 if (in->buf->readcallback == NULL) return(-1);
947
948 CHECK_BUFFER(in);
949
950 used = in->cur - in->buf->buffer->content;
951 ret = xmlBufferShrink(in->buf->buffer, used);
952 if (ret > 0) {
953 in->cur -= ret;
954 in->consumed += ret;
955 }
956 ret = xmlParserInputBufferRead(in->buf, len);
957 if (in->base != in->buf->buffer->content) {
958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000959 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000961 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +0000962 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000963 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
Daniel Veillard48b2f892001-02-25 16:11:03 +0000965 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 CHECK_BUFFER(in);
968
969 return(ret);
970}
971
972/**
973 * xmlParserInputGrow:
974 * @in: an XML parser input
975 * @len: an indicative size for the lookahead
976 *
977 * This function increase the input for the parser. It tries to
978 * preserve pointers to the input buffer, and keep already read data
979 *
980 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
981 * end of this entity
982 */
983int
984xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000986 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988#ifdef DEBUG_INPUT
989 xmlGenericError(xmlGenericErrorContext, "Grow\n");
990#endif
991 if (in->buf == NULL) return(-1);
992 if (in->base == NULL) return(-1);
993 if (in->cur == NULL) return(-1);
994 if (in->buf->buffer == NULL) return(-1);
995
996 CHECK_BUFFER(in);
997
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000998 indx = in->cur - in->base;
999 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
Owen Taylor3473f882001-02-23 17:55:21 +00001000
1001 CHECK_BUFFER(in);
1002
1003 return(0);
1004 }
1005 if (in->buf->readcallback != NULL)
1006 ret = xmlParserInputBufferGrow(in->buf, len);
1007 else
1008 return(0);
1009
1010 /*
Daniel Veillard48b2f892001-02-25 16:11:03 +00001011 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
Owen Taylor3473f882001-02-23 17:55:21 +00001012 * block, but we use it really as an integer to do some
1013 * pointer arithmetic. Insure will raise it as a bug but in
1014 * that specific case, that's not !
1015 */
1016 if (in->base != in->buf->buffer->content) {
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * the buffer has been reallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001020 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001021 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001022 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001023 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001024 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001025
1026 CHECK_BUFFER(in);
1027
1028 return(ret);
1029}
1030
1031/**
1032 * xmlParserInputShrink:
1033 * @in: an XML parser input
1034 *
1035 * This function removes used input for the parser.
1036 */
1037void
1038xmlParserInputShrink(xmlParserInputPtr in) {
1039 int used;
1040 int ret;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001041 int indx;
Owen Taylor3473f882001-02-23 17:55:21 +00001042
1043#ifdef DEBUG_INPUT
1044 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1045#endif
1046 if (in->buf == NULL) return;
1047 if (in->base == NULL) return;
1048 if (in->cur == NULL) return;
1049 if (in->buf->buffer == NULL) return;
1050
1051 CHECK_BUFFER(in);
1052
1053 used = in->cur - in->buf->buffer->content;
1054 /*
1055 * Do not shrink on large buffers whose only a tiny fraction
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001056 * was consumed
Owen Taylor3473f882001-02-23 17:55:21 +00001057 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001058 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
Owen Taylor3473f882001-02-23 17:55:21 +00001059 return;
1060 if (used > INPUT_CHUNK) {
1061 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 if (ret > 0) {
1063 in->cur -= ret;
1064 in->consumed += ret;
1065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001066 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001067 }
1068
1069 CHECK_BUFFER(in);
1070
1071 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 return;
1073 }
1074 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1075 if (in->base != in->buf->buffer->content) {
1076 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001077 * the buffer has been ereallocated
Owen Taylor3473f882001-02-23 17:55:21 +00001078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079 indx = in->cur - in->base;
Owen Taylor3473f882001-02-23 17:55:21 +00001080 in->base = in->buf->buffer->content;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081 in->cur = &in->buf->buffer->content[indx];
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001083 in->end = &in->buf->buffer->content[in->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001084
1085 CHECK_BUFFER(in);
1086}
1087
1088/************************************************************************
1089 * *
1090 * UTF8 character input and related functions *
1091 * *
1092 ************************************************************************/
1093
1094/**
1095 * xmlNextChar:
1096 * @ctxt: the XML parser context
1097 *
1098 * Skip to the next char input char.
1099 */
1100
1101void
1102xmlNextChar(xmlParserCtxtPtr ctxt) {
1103 if (ctxt->instate == XML_PARSER_EOF)
1104 return;
1105
1106 /*
1107 * 2.11 End-of-Line Handling
1108 * the literal two-character sequence "#xD#xA" or a standalone
1109 * literal #xD, an XML processor must pass to the application
1110 * the single character #xA.
1111 */
1112 if (ctxt->token != 0) ctxt->token = 0;
1113 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1114 if ((*ctxt->input->cur == 0) &&
1115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1116 (ctxt->instate != XML_PARSER_COMMENT)) {
1117 /*
1118 * If we are at the end of the current entity and
1119 * the context allows it, we pop consumed entities
1120 * automatically.
1121 * the auto closing should be blocked in other cases
1122 */
1123 xmlPopInput(ctxt);
1124 } else {
1125 if (*(ctxt->input->cur) == '\n') {
1126 ctxt->input->line++; ctxt->input->col = 1;
1127 } else ctxt->input->col++;
1128 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1129 /*
1130 * We are supposed to handle UTF8, check it's valid
1131 * From rfc2044: encoding of the Unicode values on UTF-8:
1132 *
1133 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1134 * 0000 0000-0000 007F 0xxxxxxx
1135 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1136 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1137 *
1138 * Check for the 0x110000 limit too
1139 */
1140 const unsigned char *cur = ctxt->input->cur;
1141 unsigned char c;
1142
1143 c = *cur;
1144 if (c & 0x80) {
1145 if (cur[1] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[1] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xe0) == 0xe0) {
1150 unsigned int val;
1151
1152 if (cur[2] == 0)
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if ((cur[2] & 0xc0) != 0x80)
1155 goto encoding_error;
1156 if ((c & 0xf0) == 0xf0) {
1157 if (cur[3] == 0)
1158 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1159 if (((c & 0xf8) != 0xf0) ||
1160 ((cur[3] & 0xc0) != 0x80))
1161 goto encoding_error;
1162 /* 4-byte code */
1163 ctxt->input->cur += 4;
1164 val = (cur[0] & 0x7) << 18;
1165 val |= (cur[1] & 0x3f) << 12;
1166 val |= (cur[2] & 0x3f) << 6;
1167 val |= cur[3] & 0x3f;
1168 } else {
1169 /* 3-byte code */
1170 ctxt->input->cur += 3;
1171 val = (cur[0] & 0xf) << 12;
1172 val |= (cur[1] & 0x3f) << 6;
1173 val |= cur[2] & 0x3f;
1174 }
1175 if (((val > 0xd7ff) && (val < 0xe000)) ||
1176 ((val > 0xfffd) && (val < 0x10000)) ||
1177 (val >= 0x110000)) {
1178 if ((ctxt->sax != NULL) &&
1179 (ctxt->sax->error != NULL))
1180 ctxt->sax->error(ctxt->userData,
1181 "Char 0x%X out of allowed range\n", val);
1182 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1183 ctxt->wellFormed = 0;
1184 ctxt->disableSAX = 1;
1185 }
1186 } else
1187 /* 2-byte code */
1188 ctxt->input->cur += 2;
1189 } else
1190 /* 1-byte code */
1191 ctxt->input->cur++;
1192 } else {
1193 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001194 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001195 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001196 * XML constructs only use < 128 chars
1197 */
1198 ctxt->input->cur++;
1199 }
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 } else {
1205 ctxt->input->cur++;
1206 ctxt->nbChars++;
1207 if (*ctxt->input->cur == 0)
1208 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1209 }
1210 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1211 xmlParserHandlePEReference(ctxt);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
1215 return;
1216encoding_error:
1217 /*
1218 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001219 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001220 * declaration header. Report the error and switch the encoding
1221 * to ISO-Latin-1 (if you don't like this policy, just declare the
1222 * encoding !)
1223 */
1224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1225 ctxt->sax->error(ctxt->userData,
1226 "Input is not proper UTF-8, indicate encoding !\n");
1227 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1228 ctxt->input->cur[0], ctxt->input->cur[1],
1229 ctxt->input->cur[2], ctxt->input->cur[3]);
1230 }
1231 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1232
1233 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1234 ctxt->input->cur++;
1235 return;
1236}
1237
1238/**
1239 * xmlCurrentChar:
1240 * @ctxt: the XML parser context
1241 * @len: pointer to the length of the char read
1242 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001243 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001244 * bytes in the input buffer. Implement the end of line normalization:
1245 * 2.11 End-of-Line Handling
1246 * Wherever an external parsed entity or the literal entity value
1247 * of an internal parsed entity contains either the literal two-character
1248 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1249 * must pass to the application the single character #xA.
1250 * This behavior can conveniently be produced by normalizing all
1251 * line breaks to #xA on input, before parsing.)
1252 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001253 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001254 */
1255
1256int
1257xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1258 if (ctxt->instate == XML_PARSER_EOF)
1259 return(0);
1260
1261 if (ctxt->token != 0) {
1262 *len = 0;
1263 return(ctxt->token);
1264 }
1265 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1266 *len = 1;
1267 return((int) *ctxt->input->cur);
1268 }
1269 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1270 /*
1271 * We are supposed to handle UTF8, check it's valid
1272 * From rfc2044: encoding of the Unicode values on UTF-8:
1273 *
1274 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1275 * 0000 0000-0000 007F 0xxxxxxx
1276 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1277 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1278 *
1279 * Check for the 0x110000 limit too
1280 */
1281 const unsigned char *cur = ctxt->input->cur;
1282 unsigned char c;
1283 unsigned int val;
1284
1285 c = *cur;
1286 if (c & 0x80) {
1287 if (cur[1] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if ((cur[1] & 0xc0) != 0x80)
1290 goto encoding_error;
1291 if ((c & 0xe0) == 0xe0) {
1292
1293 if (cur[2] == 0)
1294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1295 if ((cur[2] & 0xc0) != 0x80)
1296 goto encoding_error;
1297 if ((c & 0xf0) == 0xf0) {
1298 if (cur[3] == 0)
1299 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1300 if (((c & 0xf8) != 0xf0) ||
1301 ((cur[3] & 0xc0) != 0x80))
1302 goto encoding_error;
1303 /* 4-byte code */
1304 *len = 4;
1305 val = (cur[0] & 0x7) << 18;
1306 val |= (cur[1] & 0x3f) << 12;
1307 val |= (cur[2] & 0x3f) << 6;
1308 val |= cur[3] & 0x3f;
1309 } else {
1310 /* 3-byte code */
1311 *len = 3;
1312 val = (cur[0] & 0xf) << 12;
1313 val |= (cur[1] & 0x3f) << 6;
1314 val |= cur[2] & 0x3f;
1315 }
1316 } else {
1317 /* 2-byte code */
1318 *len = 2;
1319 val = (cur[0] & 0x1f) << 6;
1320 val |= cur[1] & 0x3f;
1321 }
1322 if (!IS_CHAR(val)) {
1323 if ((ctxt->sax != NULL) &&
1324 (ctxt->sax->error != NULL))
1325 ctxt->sax->error(ctxt->userData,
1326 "Char 0x%X out of allowed range\n", val);
1327 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1328 ctxt->wellFormed = 0;
1329 ctxt->disableSAX = 1;
1330 }
1331 return(val);
1332 } else {
1333 /* 1-byte code */
1334 *len = 1;
1335 if (*ctxt->input->cur == 0xD) {
1336 if (ctxt->input->cur[1] == 0xA) {
1337 ctxt->nbChars++;
1338 ctxt->input->cur++;
1339 }
1340 return(0xA);
1341 }
1342 return((int) *ctxt->input->cur);
1343 }
1344 }
1345 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001346 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001347 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001348 * XML constructs only use < 128 chars
1349 */
1350 *len = 1;
1351 if (*ctxt->input->cur == 0xD) {
1352 if (ctxt->input->cur[1] == 0xA) {
1353 ctxt->nbChars++;
1354 ctxt->input->cur++;
1355 }
1356 return(0xA);
1357 }
1358 return((int) *ctxt->input->cur);
1359encoding_error:
1360 /*
1361 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001362 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001363 * declaration header. Report the error and switch the encoding
1364 * to ISO-Latin-1 (if you don't like this policy, just declare the
1365 * encoding !)
1366 */
1367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1368 ctxt->sax->error(ctxt->userData,
1369 "Input is not proper UTF-8, indicate encoding !\n");
1370 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1371 ctxt->input->cur[0], ctxt->input->cur[1],
1372 ctxt->input->cur[2], ctxt->input->cur[3]);
1373 }
1374 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1375
1376 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1377 *len = 1;
1378 return((int) *ctxt->input->cur);
1379}
1380
1381/**
1382 * xmlStringCurrentChar:
1383 * @ctxt: the XML parser context
1384 * @cur: pointer to the beginning of the char
1385 * @len: pointer to the length of the char read
1386 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001387 * The current char value, if using UTF-8 this may actually span multiple
Owen Taylor3473f882001-02-23 17:55:21 +00001388 * bytes in the input buffer.
1389 *
Daniel Veillard60087f32001-10-10 09:45:09 +00001390 * Returns the current char value and its length
Owen Taylor3473f882001-02-23 17:55:21 +00001391 */
1392
1393int
Daniel Veillardd8224e02002-01-13 15:43:22 +00001394xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1395{
Daniel Veillard61d80a22001-04-27 17:13:01 +00001396 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
Daniel Veillardd8224e02002-01-13 15:43:22 +00001397 /*
1398 * We are supposed to handle UTF8, check it's valid
1399 * From rfc2044: encoding of the Unicode values on UTF-8:
1400 *
1401 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1402 * 0000 0000-0000 007F 0xxxxxxx
1403 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1404 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1405 *
1406 * Check for the 0x110000 limit too
1407 */
1408 unsigned char c;
1409 unsigned int val;
Owen Taylor3473f882001-02-23 17:55:21 +00001410
Daniel Veillardd8224e02002-01-13 15:43:22 +00001411 c = *cur;
1412 if (c & 0x80) {
1413 if ((cur[1] & 0xc0) != 0x80)
1414 goto encoding_error;
1415 if ((c & 0xe0) == 0xe0) {
Owen Taylor3473f882001-02-23 17:55:21 +00001416
Daniel Veillardd8224e02002-01-13 15:43:22 +00001417 if ((cur[2] & 0xc0) != 0x80)
1418 goto encoding_error;
1419 if ((c & 0xf0) == 0xf0) {
1420 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1421 goto encoding_error;
1422 /* 4-byte code */
1423 *len = 4;
1424 val = (cur[0] & 0x7) << 18;
1425 val |= (cur[1] & 0x3f) << 12;
1426 val |= (cur[2] & 0x3f) << 6;
1427 val |= cur[3] & 0x3f;
1428 } else {
1429 /* 3-byte code */
1430 *len = 3;
1431 val = (cur[0] & 0xf) << 12;
1432 val |= (cur[1] & 0x3f) << 6;
1433 val |= cur[2] & 0x3f;
1434 }
1435 } else {
1436 /* 2-byte code */
1437 *len = 2;
1438 val = (cur[0] & 0x1f) << 6;
1439 val |= cur[1] & 0x3f;
1440 }
1441 if (!IS_CHAR(val)) {
1442 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1443 (ctxt->sax->error != NULL))
1444 ctxt->sax->error(ctxt->userData,
1445 "Char 0x%X out of allowed range\n",
1446 val);
1447 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1448 ctxt->wellFormed = 0;
1449 ctxt->disableSAX = 1;
1450 }
1451 return (val);
1452 } else {
1453 /* 1-byte code */
1454 *len = 1;
1455 return ((int) *cur);
1456 }
Owen Taylor3473f882001-02-23 17:55:21 +00001457 }
1458 /*
Daniel Veillard60087f32001-10-10 09:45:09 +00001459 * Assume it's a fixed length encoding (1) with
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001460 * a compatible encoding for the ASCII set, since
Owen Taylor3473f882001-02-23 17:55:21 +00001461 * XML constructs only use < 128 chars
1462 */
1463 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001464 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001465encoding_error:
Daniel Veillardd8224e02002-01-13 15:43:22 +00001466
Owen Taylor3473f882001-02-23 17:55:21 +00001467 /*
1468 * If we detect an UTF8 error that probably mean that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001469 * input encoding didn't get properly advertised in the
Owen Taylor3473f882001-02-23 17:55:21 +00001470 * declaration header. Report the error and switch the encoding
1471 * to ISO-Latin-1 (if you don't like this policy, just declare the
1472 * encoding !)
1473 */
Daniel Veillardd8224e02002-01-13 15:43:22 +00001474 if (ctxt != NULL) {
1475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1476 ctxt->sax->error(ctxt->userData,
1477 "Input is not proper UTF-8, indicate encoding !\n");
1478 ctxt->sax->error(ctxt->userData,
1479 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1480 ctxt->input->cur[0], ctxt->input->cur[1],
1481 ctxt->input->cur[2], ctxt->input->cur[3]);
1482 }
1483 ctxt->errNo = XML_ERR_INVALID_ENCODING;
Owen Taylor3473f882001-02-23 17:55:21 +00001484 }
Owen Taylor3473f882001-02-23 17:55:21 +00001485
1486 *len = 1;
Daniel Veillardd8224e02002-01-13 15:43:22 +00001487 return ((int) *cur);
Owen Taylor3473f882001-02-23 17:55:21 +00001488}
1489
1490/**
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001491 * xmlCopyCharMultiByte:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * @out: pointer to an array of xmlChar
Owen Taylor3473f882001-02-23 17:55:21 +00001493 * @val: the char value
1494 *
1495 * append the char value in the array
1496 *
1497 * Returns the number of xmlChar written
1498 */
Owen Taylor3473f882001-02-23 17:55:21 +00001499int
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001500xmlCopyCharMultiByte(xmlChar *out, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * We are supposed to handle UTF8, check it's valid
1503 * From rfc2044: encoding of the Unicode values on UTF-8:
1504 *
1505 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1506 * 0000 0000-0000 007F 0xxxxxxx
1507 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1508 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1509 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001510 if (val >= 0x80) {
1511 xmlChar *savedout = out;
1512 int bits;
1513 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1514 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1515 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1516 else {
Owen Taylor3473f882001-02-23 17:55:21 +00001517 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001519 val);
1520 return(0);
1521 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001522 for ( ; bits >= 0; bits-= 6)
1523 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1524 return (out - savedout);
Owen Taylor3473f882001-02-23 17:55:21 +00001525 }
1526 *out = (xmlChar) val;
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001527 return 1;
1528}
1529
1530/**
1531 * xmlCopyChar:
1532 * @len: Ignored, compatibility
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001533 * @out: pointer to an array of xmlChar
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001534 * @val: the char value
1535 *
1536 * append the char value in the array
1537 *
1538 * Returns the number of xmlChar written
1539 */
1540
1541int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00001542xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001543 /* the len parameter is ignored */
1544 if (val >= 0x80) {
1545 return(xmlCopyCharMultiByte (out, val));
1546 }
1547 *out = (xmlChar) val;
1548 return 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001549}
1550
1551/************************************************************************
1552 * *
1553 * Commodity functions to switch encodings *
1554 * *
1555 ************************************************************************/
1556
1557/**
1558 * xmlSwitchEncoding:
1559 * @ctxt: the parser context
1560 * @enc: the encoding value (number)
1561 *
1562 * change the input functions when discovering the character encoding
1563 * of a given entity.
1564 *
1565 * Returns 0 in case of success, -1 otherwise
1566 */
1567int
1568xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1569{
1570 xmlCharEncodingHandlerPtr handler;
1571
1572 switch (enc) {
1573 case XML_CHAR_ENCODING_ERROR:
1574 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1576 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1577 ctxt->wellFormed = 0;
1578 ctxt->disableSAX = 1;
1579 break;
1580 case XML_CHAR_ENCODING_NONE:
1581 /* let's assume it's UTF-8 without the XML decl */
1582 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1583 return(0);
1584 case XML_CHAR_ENCODING_UTF8:
1585 /* default encoding, no conversion should be needed */
1586 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard87a764e2001-06-20 17:41:10 +00001587
1588 /*
1589 * Errata on XML-1.0 June 20 2001
1590 * Specific handling of the Byte Order Mark for
1591 * UTF-8
1592 */
Daniel Veillard3e5bb8e2001-06-27 16:34:34 +00001593 if ((ctxt->input != NULL) &&
1594 (ctxt->input->cur[0] == 0xEF) &&
Daniel Veillard87a764e2001-06-20 17:41:10 +00001595 (ctxt->input->cur[1] == 0xBB) &&
1596 (ctxt->input->cur[2] == 0xBF)) {
1597 ctxt->input->cur += 3;
1598 }
Owen Taylor3473f882001-02-23 17:55:21 +00001599 return(0);
1600 default:
1601 break;
1602 }
1603 handler = xmlGetCharEncodingHandler(enc);
1604 if (handler == NULL) {
1605 /*
1606 * Default handlers.
1607 */
1608 switch (enc) {
1609 case XML_CHAR_ENCODING_ERROR:
1610 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1613 ctxt->wellFormed = 0;
1614 ctxt->disableSAX = 1;
1615 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1616 break;
1617 case XML_CHAR_ENCODING_NONE:
1618 /* let's assume it's UTF-8 without the XML decl */
1619 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1620 return(0);
1621 case XML_CHAR_ENCODING_UTF8:
1622 case XML_CHAR_ENCODING_ASCII:
1623 /* default encoding, no conversion should be needed */
1624 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1625 return(0);
1626 case XML_CHAR_ENCODING_UTF16LE:
1627 break;
1628 case XML_CHAR_ENCODING_UTF16BE:
1629 break;
1630 case XML_CHAR_ENCODING_UCS4LE:
1631 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633 ctxt->sax->error(ctxt->userData,
1634 "char encoding USC4 little endian not supported\n");
1635 break;
1636 case XML_CHAR_ENCODING_UCS4BE:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding USC4 big endian not supported\n");
1641 break;
1642 case XML_CHAR_ENCODING_EBCDIC:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding EBCDIC not supported\n");
1647 break;
1648 case XML_CHAR_ENCODING_UCS4_2143:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding UCS4 2143 not supported\n");
1653 break;
1654 case XML_CHAR_ENCODING_UCS4_3412:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS4 3412 not supported\n");
1659 break;
1660 case XML_CHAR_ENCODING_UCS2:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding UCS2 not supported\n");
1665 break;
1666 case XML_CHAR_ENCODING_8859_1:
1667 case XML_CHAR_ENCODING_8859_2:
1668 case XML_CHAR_ENCODING_8859_3:
1669 case XML_CHAR_ENCODING_8859_4:
1670 case XML_CHAR_ENCODING_8859_5:
1671 case XML_CHAR_ENCODING_8859_6:
1672 case XML_CHAR_ENCODING_8859_7:
1673 case XML_CHAR_ENCODING_8859_8:
1674 case XML_CHAR_ENCODING_8859_9:
1675 /*
1676 * We used to keep the internal content in the
1677 * document encoding however this turns being unmaintainable
1678 * So xmlGetCharEncodingHandler() will return non-null
1679 * values for this now.
1680 */
1681 if ((ctxt->inputNr == 1) &&
1682 (ctxt->encoding == NULL) &&
1683 (ctxt->input->encoding != NULL)) {
1684 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1685 }
1686 ctxt->charset = enc;
1687 return(0);
1688 case XML_CHAR_ENCODING_2022_JP:
1689 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1691 ctxt->sax->error(ctxt->userData,
1692 "char encoding ISO-2022-JPnot supported\n");
1693 break;
1694 case XML_CHAR_ENCODING_SHIFT_JIS:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding Shift_JIS not supported\n");
1699 break;
1700 case XML_CHAR_ENCODING_EUC_JP:
1701 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1703 ctxt->sax->error(ctxt->userData,
1704 "char encoding EUC-JPnot supported\n");
1705 break;
1706 }
1707 }
1708 if (handler == NULL)
1709 return(-1);
1710 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1711 return(xmlSwitchToEncoding(ctxt, handler));
1712}
1713
1714/**
1715 * xmlSwitchToEncoding:
1716 * @ctxt: the parser context
1717 * @handler: the encoding handler
1718 *
1719 * change the input functions when discovering the character encoding
1720 * of a given entity.
1721 *
1722 * Returns 0 in case of success, -1 otherwise
1723 */
1724int
1725xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1726{
1727 int nbchars;
1728
1729 if (handler != NULL) {
1730 if (ctxt->input != NULL) {
1731 if (ctxt->input->buf != NULL) {
1732 if (ctxt->input->buf->encoder != NULL) {
1733 if (ctxt->input->buf->encoder == handler)
1734 return(0);
1735 /*
1736 * Note: this is a bit dangerous, but that's what it
1737 * takes to use nearly compatible signature for different
1738 * encodings.
1739 */
1740 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1741 ctxt->input->buf->encoder = handler;
1742 return(0);
1743 }
1744 ctxt->input->buf->encoder = handler;
1745
1746 /*
1747 * Is there already some content down the pipe to convert ?
1748 */
1749 if ((ctxt->input->buf->buffer != NULL) &&
1750 (ctxt->input->buf->buffer->use > 0)) {
1751 int processed;
1752
1753 /*
1754 * Specific handling of the Byte Order Mark for
1755 * UTF-16
1756 */
1757 if ((handler->name != NULL) &&
1758 (!strcmp(handler->name, "UTF-16LE")) &&
1759 (ctxt->input->cur[0] == 0xFF) &&
1760 (ctxt->input->cur[1] == 0xFE)) {
1761 ctxt->input->cur += 2;
1762 }
1763 if ((handler->name != NULL) &&
1764 (!strcmp(handler->name, "UTF-16BE")) &&
1765 (ctxt->input->cur[0] == 0xFE) &&
1766 (ctxt->input->cur[1] == 0xFF)) {
1767 ctxt->input->cur += 2;
1768 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001769 /*
1770 * Errata on XML-1.0 June 20 2001
1771 * Specific handling of the Byte Order Mark for
1772 * UTF-8
1773 */
1774 if ((handler->name != NULL) &&
1775 (!strcmp(handler->name, "UTF-8")) &&
1776 (ctxt->input->cur[0] == 0xEF) &&
1777 (ctxt->input->cur[1] == 0xBB) &&
Daniel Veillard7dd05702001-10-04 14:25:12 +00001778 (ctxt->input->cur[2] == 0xBF)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001779 ctxt->input->cur += 3;
1780 }
Owen Taylor3473f882001-02-23 17:55:21 +00001781
1782 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001783 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001784 * Move it as the raw buffer and create a new input buffer
1785 */
1786 processed = ctxt->input->cur - ctxt->input->base;
1787 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1788 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1789 ctxt->input->buf->buffer = xmlBufferCreate();
1790
1791 if (ctxt->html) {
1792 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001793 * convert as much as possible of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00001794 */
1795 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1796 ctxt->input->buf->buffer,
1797 ctxt->input->buf->raw);
1798 } else {
1799 /*
1800 * convert just enough to get
1801 * '<?xml version="1.0" encoding="xxx"?>'
1802 * parsed with the autodetected encoding
1803 * into the parser reading buffer.
1804 */
1805 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1806 ctxt->input->buf->buffer,
1807 ctxt->input->buf->raw);
1808 }
1809 if (nbchars < 0) {
1810 xmlGenericError(xmlGenericErrorContext,
1811 "xmlSwitchToEncoding: encoder error\n");
1812 return(-1);
1813 }
1814 ctxt->input->base =
1815 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001816 ctxt->input->end =
1817 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001818
1819 }
1820 return(0);
1821 } else {
1822 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1823 /*
1824 * When parsing a static memory array one must know the
1825 * size to be able to convert the buffer.
1826 */
1827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1828 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001829 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001830 return(-1);
1831 } else {
1832 int processed;
1833
1834 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001835 * Shrink the current input buffer.
Owen Taylor3473f882001-02-23 17:55:21 +00001836 * Move it as the raw buffer and create a new input buffer
1837 */
1838 processed = ctxt->input->cur - ctxt->input->base;
1839
1840 ctxt->input->buf->raw = xmlBufferCreate();
1841 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1842 ctxt->input->length - processed);
1843 ctxt->input->buf->buffer = xmlBufferCreate();
1844
1845 /*
1846 * convert as much as possible of the raw input
1847 * to the parser reading buffer.
1848 */
1849 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1850 ctxt->input->buf->buffer,
1851 ctxt->input->buf->raw);
1852 if (nbchars < 0) {
1853 xmlGenericError(xmlGenericErrorContext,
1854 "xmlSwitchToEncoding: encoder error\n");
1855 return(-1);
1856 }
1857
1858 /*
1859 * Conversion succeeded, get rid of the old buffer
1860 */
1861 if ((ctxt->input->free != NULL) &&
1862 (ctxt->input->base != NULL))
1863 ctxt->input->free((xmlChar *) ctxt->input->base);
1864 ctxt->input->base =
1865 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001866 ctxt->input->end =
1867 &ctxt->input->base[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001868 }
1869 }
1870 } else {
1871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1872 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001873 "xmlSwitchToEncoding : no input\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(-1);
1875 }
1876 /*
1877 * The parsing is now done in UTF8 natively
1878 */
1879 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1880 } else
1881 return(-1);
1882 return(0);
1883
1884}
1885
1886/************************************************************************
1887 * *
1888 * Commodity functions to handle entities processing *
1889 * *
1890 ************************************************************************/
1891
1892/**
1893 * xmlFreeInputStream:
1894 * @input: an xmlParserInputPtr
1895 *
1896 * Free up an input stream.
1897 */
1898void
1899xmlFreeInputStream(xmlParserInputPtr input) {
1900 if (input == NULL) return;
1901
1902 if (input->filename != NULL) xmlFree((char *) input->filename);
1903 if (input->directory != NULL) xmlFree((char *) input->directory);
1904 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1905 if (input->version != NULL) xmlFree((char *) input->version);
1906 if ((input->free != NULL) && (input->base != NULL))
1907 input->free((xmlChar *) input->base);
1908 if (input->buf != NULL)
1909 xmlFreeParserInputBuffer(input->buf);
Owen Taylor3473f882001-02-23 17:55:21 +00001910 xmlFree(input);
1911}
1912
1913/**
1914 * xmlNewInputStream:
1915 * @ctxt: an XML parser context
1916 *
1917 * Create a new input stream structure
1918 * Returns the new input stream or NULL
1919 */
1920xmlParserInputPtr
1921xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1922 xmlParserInputPtr input;
1923
1924 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1925 if (input == NULL) {
1926 if (ctxt != NULL) {
1927 ctxt->errNo = XML_ERR_NO_MEMORY;
1928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1929 ctxt->sax->error(ctxt->userData,
1930 "malloc: couldn't allocate a new input stream\n");
1931 ctxt->errNo = XML_ERR_NO_MEMORY;
1932 }
1933 return(NULL);
1934 }
1935 memset(input, 0, sizeof(xmlParserInput));
1936 input->line = 1;
1937 input->col = 1;
1938 input->standalone = -1;
1939 return(input);
1940}
1941
1942/**
1943 * xmlNewIOInputStream:
1944 * @ctxt: an XML parser context
1945 * @input: an I/O Input
1946 * @enc: the charset encoding if known
1947 *
1948 * Create a new input stream structure encapsulating the @input into
1949 * a stream suitable for the parser.
1950 *
1951 * Returns the new input stream or NULL
1952 */
1953xmlParserInputPtr
1954xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1955 xmlCharEncoding enc) {
1956 xmlParserInputPtr inputStream;
1957
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1960 inputStream = xmlNewInputStream(ctxt);
1961 if (inputStream == NULL) {
1962 return(NULL);
1963 }
1964 inputStream->filename = NULL;
1965 inputStream->buf = input;
1966 inputStream->base = inputStream->buf->buffer->content;
1967 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001968 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00001969 if (enc != XML_CHAR_ENCODING_NONE) {
1970 xmlSwitchEncoding(ctxt, enc);
1971 }
1972
1973 return(inputStream);
1974}
1975
1976/**
1977 * xmlNewEntityInputStream:
1978 * @ctxt: an XML parser context
1979 * @entity: an Entity pointer
1980 *
1981 * Create a new input stream based on an xmlEntityPtr
1982 *
1983 * Returns the new input stream or NULL
1984 */
1985xmlParserInputPtr
1986xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1987 xmlParserInputPtr input;
1988
1989 if (entity == NULL) {
1990 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1992 ctxt->sax->error(ctxt->userData,
1993 "internal: xmlNewEntityInputStream entity = NULL\n");
1994 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1995 return(NULL);
1996 }
1997 if (xmlParserDebugEntities)
1998 xmlGenericError(xmlGenericErrorContext,
1999 "new input from entity: %s\n", entity->name);
2000 if (entity->content == NULL) {
2001 switch (entity->etype) {
2002 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2003 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "xmlNewEntityInputStream unparsed entity !\n");
2007 break;
2008 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2009 case XML_EXTERNAL_PARAMETER_ENTITY:
2010 return(xmlLoadExternalEntity((char *) entity->URI,
2011 (char *) entity->ExternalID, ctxt));
2012 case XML_INTERNAL_GENERAL_ENTITY:
2013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2014 ctxt->sax->error(ctxt->userData,
2015 "Internal entity %s without content !\n", entity->name);
2016 break;
2017 case XML_INTERNAL_PARAMETER_ENTITY:
2018 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData,
2021 "Internal parameter entity %s without content !\n", entity->name);
2022 break;
2023 case XML_INTERNAL_PREDEFINED_ENTITY:
2024 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2026 ctxt->sax->error(ctxt->userData,
2027 "Predefined entity %s without content !\n", entity->name);
2028 break;
2029 }
2030 return(NULL);
2031 }
2032 input = xmlNewInputStream(ctxt);
2033 if (input == NULL) {
2034 return(NULL);
2035 }
2036 input->filename = (char *) entity->URI;
2037 input->base = entity->content;
2038 input->cur = entity->content;
2039 input->length = entity->length;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002040 input->end = &entity->content[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002041 return(input);
2042}
2043
2044/**
2045 * xmlNewStringInputStream:
2046 * @ctxt: an XML parser context
2047 * @buffer: an memory buffer
2048 *
2049 * Create a new input stream based on a memory buffer.
2050 * Returns the new input stream
2051 */
2052xmlParserInputPtr
2053xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2054 xmlParserInputPtr input;
2055
2056 if (buffer == NULL) {
2057 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2059 ctxt->sax->error(ctxt->userData,
2060 "internal: xmlNewStringInputStream string = NULL\n");
2061 return(NULL);
2062 }
2063 if (xmlParserDebugEntities)
2064 xmlGenericError(xmlGenericErrorContext,
2065 "new fixed input: %.30s\n", buffer);
2066 input = xmlNewInputStream(ctxt);
2067 if (input == NULL) {
2068 return(NULL);
2069 }
2070 input->base = buffer;
2071 input->cur = buffer;
2072 input->length = xmlStrlen(buffer);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002073 input->end = &buffer[input->length];
Owen Taylor3473f882001-02-23 17:55:21 +00002074 return(input);
2075}
2076
2077/**
2078 * xmlNewInputFromFile:
2079 * @ctxt: an XML parser context
2080 * @filename: the filename to use as entity
2081 *
2082 * Create a new input stream based on a file.
2083 *
2084 * Returns the new input stream or NULL in case of error
2085 */
2086xmlParserInputPtr
2087xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2088 xmlParserInputBufferPtr buf;
2089 xmlParserInputPtr inputStream;
2090 char *directory = NULL;
2091 xmlChar *URI = NULL;
2092
2093 if (xmlParserDebugEntities)
2094 xmlGenericError(xmlGenericErrorContext,
2095 "new input from file: %s\n", filename);
2096 if (ctxt == NULL) return(NULL);
2097 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2098 if (buf == NULL)
2099 return(NULL);
2100
2101 URI = xmlStrdup((xmlChar *) filename);
2102 directory = xmlParserGetDirectory((const char *) URI);
2103
2104 inputStream = xmlNewInputStream(ctxt);
2105 if (inputStream == NULL) {
2106 if (directory != NULL) xmlFree((char *) directory);
2107 if (URI != NULL) xmlFree((char *) URI);
2108 return(NULL);
2109 }
2110
2111 inputStream->filename = (const char *) URI;
2112 inputStream->directory = directory;
2113 inputStream->buf = buf;
2114
2115 inputStream->base = inputStream->buf->buffer->content;
2116 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002117 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if ((ctxt->directory == NULL) && (directory != NULL))
2119 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2120 return(inputStream);
2121}
2122
2123/************************************************************************
2124 * *
2125 * Commodity functions to handle parser contexts *
2126 * *
2127 ************************************************************************/
2128
2129/**
2130 * xmlInitParserCtxt:
2131 * @ctxt: an XML parser context
2132 *
2133 * Initialize a parser context
2134 */
2135
2136void
2137xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2138{
2139 xmlSAXHandler *sax;
2140
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002141 if(ctxt==NULL) {
2142 xmlGenericError(xmlGenericErrorContext,
2143 "xmlInitParserCtxt: NULL context given\n");
2144 return;
2145 }
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 xmlDefaultSAXHandlerInit();
2148
2149 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2150 if (sax == NULL) {
2151 xmlGenericError(xmlGenericErrorContext,
2152 "xmlInitParserCtxt: out of memory\n");
2153 }
2154 else
2155 memset(sax, 0, sizeof(xmlSAXHandler));
2156
2157 /* Allocate the Input stack */
2158 ctxt->inputTab = (xmlParserInputPtr *)
2159 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2160 if (ctxt->inputTab == NULL) {
2161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: out of memory\n");
2163 ctxt->inputNr = 0;
2164 ctxt->inputMax = 0;
2165 ctxt->input = NULL;
2166 return;
2167 }
2168 ctxt->inputNr = 0;
2169 ctxt->inputMax = 5;
2170 ctxt->input = NULL;
2171
2172 ctxt->version = NULL;
2173 ctxt->encoding = NULL;
2174 ctxt->standalone = -1;
2175 ctxt->hasExternalSubset = 0;
2176 ctxt->hasPErefs = 0;
2177 ctxt->html = 0;
2178 ctxt->external = 0;
2179 ctxt->instate = XML_PARSER_START;
2180 ctxt->token = 0;
2181 ctxt->directory = NULL;
2182
2183 /* Allocate the Node stack */
2184 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2185 if (ctxt->nodeTab == NULL) {
2186 xmlGenericError(xmlGenericErrorContext,
2187 "xmlInitParserCtxt: out of memory\n");
2188 ctxt->nodeNr = 0;
2189 ctxt->nodeMax = 0;
2190 ctxt->node = NULL;
2191 ctxt->inputNr = 0;
2192 ctxt->inputMax = 0;
2193 ctxt->input = NULL;
2194 return;
2195 }
2196 ctxt->nodeNr = 0;
2197 ctxt->nodeMax = 10;
2198 ctxt->node = NULL;
2199
2200 /* Allocate the Name stack */
2201 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2202 if (ctxt->nameTab == NULL) {
2203 xmlGenericError(xmlGenericErrorContext,
2204 "xmlInitParserCtxt: out of memory\n");
2205 ctxt->nodeNr = 0;
2206 ctxt->nodeMax = 0;
2207 ctxt->node = NULL;
2208 ctxt->inputNr = 0;
2209 ctxt->inputMax = 0;
2210 ctxt->input = NULL;
2211 ctxt->nameNr = 0;
2212 ctxt->nameMax = 0;
2213 ctxt->name = NULL;
2214 return;
2215 }
2216 ctxt->nameNr = 0;
2217 ctxt->nameMax = 10;
2218 ctxt->name = NULL;
2219
2220 /* Allocate the space stack */
2221 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2222 if (ctxt->spaceTab == NULL) {
2223 xmlGenericError(xmlGenericErrorContext,
2224 "xmlInitParserCtxt: out of memory\n");
2225 ctxt->nodeNr = 0;
2226 ctxt->nodeMax = 0;
2227 ctxt->node = NULL;
2228 ctxt->inputNr = 0;
2229 ctxt->inputMax = 0;
2230 ctxt->input = NULL;
2231 ctxt->nameNr = 0;
2232 ctxt->nameMax = 0;
2233 ctxt->name = NULL;
2234 ctxt->spaceNr = 0;
2235 ctxt->spaceMax = 0;
2236 ctxt->space = NULL;
2237 return;
2238 }
2239 ctxt->spaceNr = 1;
2240 ctxt->spaceMax = 10;
2241 ctxt->spaceTab[0] = -1;
2242 ctxt->space = &ctxt->spaceTab[0];
2243
Daniel Veillard14be0a12001-03-03 18:50:55 +00002244 ctxt->sax = sax;
Daniel Veillard3c01b1d2001-10-17 15:58:35 +00002245 initxmlDefaultSAXHandler(sax, xmlGetWarningsDefaultValue);
Daniel Veillard14be0a12001-03-03 18:50:55 +00002246
Owen Taylor3473f882001-02-23 17:55:21 +00002247 ctxt->userData = ctxt;
2248 ctxt->myDoc = NULL;
2249 ctxt->wellFormed = 1;
2250 ctxt->valid = 1;
2251 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2252 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2253 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillarda53c6882001-07-25 17:18:57 +00002254 ctxt->linenumbers = xmlLineNumbersDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +00002255 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillard16698282001-09-14 10:29:27 +00002256 if (ctxt->keepBlanks == 0)
2257 sax->ignorableWhitespace = ignorableWhitespace;
2258
Owen Taylor3473f882001-02-23 17:55:21 +00002259 ctxt->vctxt.userData = ctxt;
2260 if (ctxt->validate) {
2261 ctxt->vctxt.error = xmlParserValidityError;
2262 if (xmlGetWarningsDefaultValue == 0)
2263 ctxt->vctxt.warning = NULL;
2264 else
2265 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard34b1b3a2001-04-21 14:16:10 +00002266 ctxt->vctxt.nodeMax = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 } else {
2268 ctxt->vctxt.error = NULL;
2269 ctxt->vctxt.warning = NULL;
2270 }
2271 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2272 ctxt->record_info = 0;
2273 ctxt->nbChars = 0;
2274 ctxt->checkIndex = 0;
2275 ctxt->inSubset = 0;
2276 ctxt->errNo = XML_ERR_OK;
2277 ctxt->depth = 0;
2278 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002279 ctxt->catalogs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002280 xmlInitNodeInfoSeq(&ctxt->node_seq);
2281}
2282
2283/**
2284 * xmlFreeParserCtxt:
2285 * @ctxt: an XML parser context
2286 *
2287 * Free all the memory used by a parser context. However the parsed
2288 * document in ctxt->myDoc is not freed.
2289 */
2290
2291void
2292xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2293{
2294 xmlParserInputPtr input;
2295 xmlChar *oldname;
2296
2297 if (ctxt == NULL) return;
2298
2299 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2300 xmlFreeInputStream(input);
2301 }
2302 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2303 xmlFree(oldname);
2304 }
2305 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2306 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2307 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2308 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2309 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2310 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2311 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2312 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2313 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
Owen Taylor3473f882001-02-23 17:55:21 +00002314 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2315 xmlFree(ctxt->sax);
2316 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
Daniel Veillarda9142e72001-06-19 11:07:54 +00002317 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002318#ifdef LIBXML_CATALOG_ENABLED
2319 if (ctxt->catalogs != NULL)
2320 xmlCatalogFreeLocal(ctxt->catalogs);
2321#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002322 xmlFree(ctxt);
2323}
2324
2325/**
2326 * xmlNewParserCtxt:
2327 *
2328 * Allocate and initialize a new parser context.
2329 *
2330 * Returns the xmlParserCtxtPtr or NULL
2331 */
2332
2333xmlParserCtxtPtr
2334xmlNewParserCtxt()
2335{
2336 xmlParserCtxtPtr ctxt;
2337
2338 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2339 if (ctxt == NULL) {
2340 xmlGenericError(xmlGenericErrorContext,
2341 "xmlNewParserCtxt : cannot allocate context\n");
2342 perror("malloc");
2343 return(NULL);
2344 }
2345 memset(ctxt, 0, sizeof(xmlParserCtxt));
2346 xmlInitParserCtxt(ctxt);
2347 return(ctxt);
2348}
2349
2350/************************************************************************
2351 * *
2352 * Handling of node informations *
2353 * *
2354 ************************************************************************/
2355
2356/**
2357 * xmlClearParserCtxt:
2358 * @ctxt: an XML parser context
2359 *
2360 * Clear (release owned resources) and reinitialize a parser context
2361 */
2362
2363void
2364xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2365{
Daniel Veillard5d96fff2001-08-31 14:55:30 +00002366 if (ctxt==NULL)
2367 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 xmlClearNodeInfoSeq(&ctxt->node_seq);
2369 xmlInitParserCtxt(ctxt);
2370}
2371
2372/**
2373 * xmlParserFindNodeInfo:
2374 * @ctxt: an XML parser context
2375 * @node: an XML node within the tree
2376 *
2377 * Find the parser node info struct for a given node
2378 *
2379 * Returns an xmlParserNodeInfo block pointer or NULL
2380 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002381const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2382 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002383{
2384 unsigned long pos;
2385
2386 /* Find position where node should be at */
2387 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
Daniel Veillardb1d62872001-09-21 09:47:08 +00002388 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
Owen Taylor3473f882001-02-23 17:55:21 +00002389 return &ctx->node_seq.buffer[pos];
2390 else
2391 return NULL;
2392}
2393
2394
2395/**
2396 * xmlInitNodeInfoSeq:
2397 * @seq: a node info sequence pointer
2398 *
2399 * -- Initialize (set to initial state) node info sequence
2400 */
2401void
2402xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2403{
2404 seq->length = 0;
2405 seq->maximum = 0;
2406 seq->buffer = NULL;
2407}
2408
2409/**
2410 * xmlClearNodeInfoSeq:
2411 * @seq: a node info sequence pointer
2412 *
2413 * -- Clear (release memory and reinitialize) node
2414 * info sequence
2415 */
2416void
2417xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2418{
2419 if ( seq->buffer != NULL )
2420 xmlFree(seq->buffer);
2421 xmlInitNodeInfoSeq(seq);
2422}
2423
2424
2425/**
2426 * xmlParserFindNodeInfoIndex:
2427 * @seq: a node info sequence pointer
2428 * @node: an XML node pointer
2429 *
2430 *
2431 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2432 * the given node is or should be at in a sorted sequence
2433 *
2434 * Returns a long indicating the position of the record
2435 */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002436unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2437 const xmlNodePtr node)
Owen Taylor3473f882001-02-23 17:55:21 +00002438{
2439 unsigned long upper, lower, middle;
2440 int found = 0;
2441
2442 /* Do a binary search for the key */
2443 lower = 1;
2444 upper = seq->length;
2445 middle = 0;
2446 while ( lower <= upper && !found) {
2447 middle = lower + (upper - lower) / 2;
2448 if ( node == seq->buffer[middle - 1].node )
2449 found = 1;
2450 else if ( node < seq->buffer[middle - 1].node )
2451 upper = middle - 1;
2452 else
2453 lower = middle + 1;
2454 }
2455
2456 /* Return position */
2457 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2458 return middle;
2459 else
2460 return middle - 1;
2461}
2462
2463
2464/**
2465 * xmlParserAddNodeInfo:
2466 * @ctxt: an XML parser context
2467 * @info: a node info sequence pointer
2468 *
2469 * Insert node info record into the sorted sequence
2470 */
2471void
2472xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002473 const xmlParserNodeInfoPtr info)
Owen Taylor3473f882001-02-23 17:55:21 +00002474{
2475 unsigned long pos;
2476 static unsigned int block_size = 5;
2477
2478 /* Find pos and check to see if node is already in the sequence */
Daniel Veillard963d2ae2002-01-20 22:08:18 +00002479 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2480 info->node);
Owen Taylor3473f882001-02-23 17:55:21 +00002481 if ( pos < ctxt->node_seq.length
2482 && ctxt->node_seq.buffer[pos].node == info->node ) {
2483 ctxt->node_seq.buffer[pos] = *info;
2484 }
2485
2486 /* Otherwise, we need to add new node to buffer */
2487 else {
2488 /* Expand buffer by 5 if needed */
2489 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2490 xmlParserNodeInfo* tmp_buffer;
2491 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2492 *(ctxt->node_seq.maximum + block_size));
2493
2494 if ( ctxt->node_seq.buffer == NULL )
2495 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2496 else
2497 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2498
2499 if ( tmp_buffer == NULL ) {
2500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2501 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2502 ctxt->errNo = XML_ERR_NO_MEMORY;
2503 return;
2504 }
2505 ctxt->node_seq.buffer = tmp_buffer;
2506 ctxt->node_seq.maximum += block_size;
2507 }
2508
2509 /* If position is not at end, move elements out of the way */
2510 if ( pos != ctxt->node_seq.length ) {
2511 unsigned long i;
2512
2513 for ( i = ctxt->node_seq.length; i > pos; i-- )
2514 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2515 }
2516
2517 /* Copy element and increase length */
2518 ctxt->node_seq.buffer[pos] = *info;
2519 ctxt->node_seq.length++;
2520 }
2521}
2522
2523/************************************************************************
2524 * *
Daniel Veillarda53c6882001-07-25 17:18:57 +00002525 * Defaults settings *
2526 * *
2527 ************************************************************************/
2528/**
2529 * xmlPedanticParserDefault:
2530 * @val: int 0 or 1
2531 *
2532 * Set and return the previous value for enabling pedantic warnings.
2533 *
2534 * Returns the last value for 0 for no substitution, 1 for substitution.
2535 */
2536
2537int
2538xmlPedanticParserDefault(int val) {
2539 int old = xmlPedanticParserDefaultValue;
2540
2541 xmlPedanticParserDefaultValue = val;
2542 return(old);
2543}
2544
2545/**
2546 * xmlLineNumbersDefault:
2547 * @val: int 0 or 1
2548 *
2549 * Set and return the previous value for enabling line numbers in elements
2550 * contents. This may break on old application and is turned off by default.
2551 *
2552 * Returns the last value for 0 for no substitution, 1 for substitution.
2553 */
2554
2555int
2556xmlLineNumbersDefault(int val) {
2557 int old = xmlLineNumbersDefaultValue;
2558
2559 xmlLineNumbersDefaultValue = val;
2560 return(old);
2561}
2562
2563/**
2564 * xmlSubstituteEntitiesDefault:
2565 * @val: int 0 or 1
2566 *
2567 * Set and return the previous value for default entity support.
2568 * Initially the parser always keep entity references instead of substituting
2569 * entity values in the output. This function has to be used to change the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002570 * default parser behavior
2571 * SAX::substituteEntities() has to be used for changing that on a file by
Daniel Veillarda53c6882001-07-25 17:18:57 +00002572 * file basis.
2573 *
2574 * Returns the last value for 0 for no substitution, 1 for substitution.
2575 */
2576
2577int
2578xmlSubstituteEntitiesDefault(int val) {
2579 int old = xmlSubstituteEntitiesDefaultValue;
2580
2581 xmlSubstituteEntitiesDefaultValue = val;
2582 return(old);
2583}
2584
2585/**
2586 * xmlKeepBlanksDefault:
2587 * @val: int 0 or 1
2588 *
2589 * Set and return the previous value for default blanks text nodes support.
2590 * The 1.x version of the parser used an heuristic to try to detect
2591 * ignorable white spaces. As a result the SAX callback was generating
2592 * ignorableWhitespace() callbacks instead of characters() one, and when
2593 * using the DOM output text nodes containing those blanks were not generated.
2594 * The 2.x and later version will switch to the XML standard way and
2595 * ignorableWhitespace() are only generated when running the parser in
2596 * validating mode and when the current element doesn't allow CDATA or
2597 * mixed content.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002598 * This function is provided as a way to force the standard behavior
Daniel Veillarda53c6882001-07-25 17:18:57 +00002599 * on 1.X libs and to switch back to the old mode for compatibility when
2600 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2601 * by using xmlIsBlankNode() commodity function to detect the "empty"
2602 * nodes generated.
2603 * This value also affect autogeneration of indentation when saving code
2604 * if blanks sections are kept, indentation is not generated.
2605 *
2606 * Returns the last value for 0 for no substitution, 1 for substitution.
2607 */
2608
2609int
2610xmlKeepBlanksDefault(int val) {
2611 int old = xmlKeepBlanksDefaultValue;
2612
2613 xmlKeepBlanksDefaultValue = val;
2614 xmlIndentTreeOutput = !val;
2615 return(old);
2616}
2617
2618/************************************************************************
2619 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002620 * Deprecated functions kept for compatibility *
2621 * *
2622 ************************************************************************/
2623
Daniel Veillard5e2dace2001-07-18 19:30:27 +00002624/**
2625 * xmlCheckLanguageID:
Owen Taylor3473f882001-02-23 17:55:21 +00002626 * @lang: pointer to the string value
2627 *
2628 * Checks that the value conforms to the LanguageID production:
2629 *
2630 * NOTE: this is somewhat deprecated, those productions were removed from
2631 * the XML Second edition.
2632 *
2633 * [33] LanguageID ::= Langcode ('-' Subcode)*
2634 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2635 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2636 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2637 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2638 * [38] Subcode ::= ([a-z] | [A-Z])+
2639 *
2640 * Returns 1 if correct 0 otherwise
2641 **/
2642int
2643xmlCheckLanguageID(const xmlChar *lang) {
2644 const xmlChar *cur = lang;
2645
2646 if (cur == NULL)
2647 return(0);
2648 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2649 ((cur[0] == 'I') && (cur[1] == '-'))) {
2650 /*
2651 * IANA code
2652 */
2653 cur += 2;
2654 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2655 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2656 cur++;
2657 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2658 ((cur[0] == 'X') && (cur[1] == '-'))) {
2659 /*
2660 * User code
2661 */
2662 cur += 2;
2663 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2664 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2665 cur++;
2666 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2667 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2668 /*
2669 * ISO639
2670 */
2671 cur++;
2672 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2673 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2674 cur++;
2675 else
2676 return(0);
2677 } else
2678 return(0);
2679 while (cur[0] != 0) { /* non input consuming */
2680 if (cur[0] != '-')
2681 return(0);
2682 cur++;
2683 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2684 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2685 cur++;
2686 else
2687 return(0);
2688 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2689 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2690 cur++;
2691 }
2692 return(1);
2693}
2694
2695/**
2696 * xmlDecodeEntities:
2697 * @ctxt: the parser context
2698 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2699 * @len: the len to decode (in bytes !), -1 for no size limit
2700 * @end: an end marker xmlChar, 0 if none
2701 * @end2: an end marker xmlChar, 0 if none
2702 * @end3: an end marker xmlChar, 0 if none
2703 *
2704 * This function is deprecated, we now always process entities content
2705 * through xmlStringDecodeEntities
2706 *
2707 * TODO: remove it in next major release.
2708 *
2709 * [67] Reference ::= EntityRef | CharRef
2710 *
2711 * [69] PEReference ::= '%' Name ';'
2712 *
2713 * Returns A newly allocated string with the substitution done. The caller
2714 * must deallocate it !
2715 */
2716xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002717xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2718 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002719#if 0
2720 xmlChar *buffer = NULL;
2721 unsigned int buffer_size = 0;
2722 unsigned int nbchars = 0;
2723
2724 xmlChar *current = NULL;
2725 xmlEntityPtr ent;
2726 unsigned int max = (unsigned int) len;
2727 int c,l;
2728#endif
2729
2730 static int deprecated = 0;
2731 if (!deprecated) {
2732 xmlGenericError(xmlGenericErrorContext,
2733 "xmlDecodeEntities() deprecated function reached\n");
2734 deprecated = 1;
2735 }
2736
2737#if 0
2738 if (ctxt->depth > 40) {
2739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2740 ctxt->sax->error(ctxt->userData,
2741 "Detected entity reference loop\n");
2742 ctxt->wellFormed = 0;
2743 ctxt->disableSAX = 1;
2744 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2745 return(NULL);
2746 }
2747
2748 /*
2749 * allocate a translation buffer.
2750 */
2751 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2752 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2753 if (buffer == NULL) {
2754 perror("xmlDecodeEntities: malloc failed");
2755 return(NULL);
2756 }
2757
2758 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002759 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002760 */
2761 GROW;
2762 c = CUR_CHAR(l);
2763 while ((nbchars < max) && (c != end) && /* NOTUSED */
2764 (c != end2) && (c != end3)) {
2765 GROW;
2766 if (c == 0) break;
2767 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2768 int val = xmlParseCharRef(ctxt);
2769 COPY_BUF(0,buffer,nbchars,val);
2770 NEXTL(l);
2771 } else if ((c == '&') && (ctxt->token != '&') &&
2772 (what & XML_SUBSTITUTE_REF)) {
2773 if (xmlParserDebugEntities)
2774 xmlGenericError(xmlGenericErrorContext,
2775 "decoding Entity Reference\n");
2776 ent = xmlParseEntityRef(ctxt);
2777 if ((ent != NULL) &&
2778 (ctxt->replaceEntities != 0)) {
2779 current = ent->content;
2780 while (*current != 0) { /* non input consuming loop */
2781 buffer[nbchars++] = *current++;
2782 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2783 growBuffer(buffer);
2784 }
2785 }
2786 } else if (ent != NULL) {
2787 const xmlChar *cur = ent->name;
2788
2789 buffer[nbchars++] = '&';
2790 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2791 growBuffer(buffer);
2792 }
2793 while (*cur != 0) { /* non input consuming loop */
2794 buffer[nbchars++] = *cur++;
2795 }
2796 buffer[nbchars++] = ';';
2797 }
2798 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2799 /*
2800 * a PEReference induce to switch the entity flow,
2801 * we break here to flush the current set of chars
2802 * parsed if any. We will be called back later.
2803 */
2804 if (xmlParserDebugEntities)
2805 xmlGenericError(xmlGenericErrorContext,
2806 "decoding PE Reference\n");
2807 if (nbchars != 0) break;
2808
2809 xmlParsePEReference(ctxt);
2810
2811 /*
2812 * Pop-up of finished entities.
2813 */
2814 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2815 xmlPopInput(ctxt);
2816
2817 break;
2818 } else {
2819 COPY_BUF(l,buffer,nbchars,c);
2820 NEXTL(l);
2821 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2822 growBuffer(buffer);
2823 }
2824 }
2825 c = CUR_CHAR(l);
2826 }
2827 buffer[nbchars++] = 0;
2828 return(buffer);
2829#endif
2830 return(NULL);
2831}
2832
2833/**
2834 * xmlNamespaceParseNCName:
2835 * @ctxt: an XML parser context
2836 *
2837 * parse an XML namespace name.
2838 *
2839 * TODO: this seems not in use anymore, the namespace handling is done on
2840 * top of the SAX interfaces, i.e. not on raw input.
2841 *
2842 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2843 *
2844 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2845 * CombiningChar | Extender
2846 *
2847 * Returns the namespace name or NULL
2848 */
2849
2850xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002851xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002852#if 0
2853 xmlChar buf[XML_MAX_NAMELEN + 5];
2854 int len = 0, l;
2855 int cur = CUR_CHAR(l);
2856#endif
2857
2858 static int deprecated = 0;
2859 if (!deprecated) {
2860 xmlGenericError(xmlGenericErrorContext,
2861 "xmlNamespaceParseNCName() deprecated function reached\n");
2862 deprecated = 1;
2863 }
2864
2865#if 0
2866 /* load first the value of the char !!! */
2867 GROW;
2868 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2869
2870xmlGenericError(xmlGenericErrorContext,
2871 "xmlNamespaceParseNCName: reached loop 3\n");
2872 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2873 (cur == '.') || (cur == '-') ||
2874 (cur == '_') ||
2875 (IS_COMBINING(cur)) ||
2876 (IS_EXTENDER(cur))) {
2877 COPY_BUF(l,buf,len,cur);
2878 NEXTL(l);
2879 cur = CUR_CHAR(l);
2880 if (len >= XML_MAX_NAMELEN) {
2881 xmlGenericError(xmlGenericErrorContext,
2882 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2883 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2884 (cur == '.') || (cur == '-') ||
2885 (cur == '_') ||
2886 (IS_COMBINING(cur)) ||
2887 (IS_EXTENDER(cur))) {
2888 NEXTL(l);
2889 cur = CUR_CHAR(l);
2890 }
2891 break;
2892 }
2893 }
2894 return(xmlStrndup(buf, len));
2895#endif
2896 return(NULL);
2897}
2898
2899/**
2900 * xmlNamespaceParseQName:
2901 * @ctxt: an XML parser context
2902 * @prefix: a xmlChar **
2903 *
2904 * TODO: this seems not in use anymore, the namespace handling is done on
2905 * top of the SAX interfaces, i.e. not on raw input.
2906 *
2907 * parse an XML qualified name
2908 *
2909 * [NS 5] QName ::= (Prefix ':')? LocalPart
2910 *
2911 * [NS 6] Prefix ::= NCName
2912 *
2913 * [NS 7] LocalPart ::= NCName
2914 *
2915 * Returns the local part, and prefix is updated
2916 * to get the Prefix if any.
2917 */
2918
2919xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002920xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002921
2922 static int deprecated = 0;
2923 if (!deprecated) {
2924 xmlGenericError(xmlGenericErrorContext,
2925 "xmlNamespaceParseQName() deprecated function reached\n");
2926 deprecated = 1;
2927 }
2928
2929#if 0
2930 xmlChar *ret = NULL;
2931
2932 *prefix = NULL;
2933 ret = xmlNamespaceParseNCName(ctxt);
2934 if (RAW == ':') {
2935 *prefix = ret;
2936 NEXT;
2937 ret = xmlNamespaceParseNCName(ctxt);
2938 }
2939
2940 return(ret);
2941#endif
2942 return(NULL);
2943}
2944
2945/**
2946 * xmlNamespaceParseNSDef:
2947 * @ctxt: an XML parser context
2948 *
2949 * parse a namespace prefix declaration
2950 *
2951 * TODO: this seems not in use anymore, the namespace handling is done on
2952 * top of the SAX interfaces, i.e. not on raw input.
2953 *
2954 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2955 *
2956 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2957 *
2958 * Returns the namespace name
2959 */
2960
2961xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002962xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002963 static int deprecated = 0;
2964 if (!deprecated) {
2965 xmlGenericError(xmlGenericErrorContext,
2966 "xmlNamespaceParseNSDef() deprecated function reached\n");
2967 deprecated = 1;
2968 }
2969 return(NULL);
2970#if 0
2971 xmlChar *name = NULL;
2972
2973 if ((RAW == 'x') && (NXT(1) == 'm') &&
2974 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2975 (NXT(4) == 's')) {
2976 SKIP(5);
2977 if (RAW == ':') {
2978 NEXT;
2979 name = xmlNamespaceParseNCName(ctxt);
2980 }
2981 }
2982 return(name);
2983#endif
2984}
2985
2986/**
2987 * xmlParseQuotedString:
2988 * @ctxt: an XML parser context
2989 *
2990 * Parse and return a string between quotes or doublequotes
2991 *
2992 * TODO: Deprecated, to be removed at next drop of binary compatibility
2993 *
2994 * Returns the string parser or NULL.
2995 */
2996xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00002997xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00002998 static int deprecated = 0;
2999 if (!deprecated) {
3000 xmlGenericError(xmlGenericErrorContext,
3001 "xmlParseQuotedString() deprecated function reached\n");
3002 deprecated = 1;
3003 }
3004 return(NULL);
3005
3006#if 0
3007 xmlChar *buf = NULL;
3008 int len = 0,l;
3009 int size = XML_PARSER_BUFFER_SIZE;
3010 int c;
3011
3012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3013 if (buf == NULL) {
3014 xmlGenericError(xmlGenericErrorContext,
3015 "malloc of %d byte failed\n", size);
3016 return(NULL);
3017 }
3018xmlGenericError(xmlGenericErrorContext,
3019 "xmlParseQuotedString: reached loop 4\n");
3020 if (RAW == '"') {
3021 NEXT;
3022 c = CUR_CHAR(l);
3023 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3024 if (len + 5 >= size) {
3025 size *= 2;
3026 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3027 if (buf == NULL) {
3028 xmlGenericError(xmlGenericErrorContext,
3029 "realloc of %d byte failed\n", size);
3030 return(NULL);
3031 }
3032 }
3033 COPY_BUF(l,buf,len,c);
3034 NEXTL(l);
3035 c = CUR_CHAR(l);
3036 }
3037 if (c != '"') {
3038 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3040 ctxt->sax->error(ctxt->userData,
3041 "String not closed \"%.50s\"\n", buf);
3042 ctxt->wellFormed = 0;
3043 ctxt->disableSAX = 1;
3044 } else {
3045 NEXT;
3046 }
3047 } else if (RAW == '\''){
3048 NEXT;
3049 c = CUR;
3050 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3051 if (len + 1 >= size) {
3052 size *= 2;
3053 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3054 if (buf == NULL) {
3055 xmlGenericError(xmlGenericErrorContext,
3056 "realloc of %d byte failed\n", size);
3057 return(NULL);
3058 }
3059 }
3060 buf[len++] = c;
3061 NEXT;
3062 c = CUR;
3063 }
3064 if (RAW != '\'') {
3065 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "String not closed \"%.50s\"\n", buf);
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 } else {
3072 NEXT;
3073 }
3074 }
3075 return(buf);
3076#endif
3077}
3078
3079/**
3080 * xmlParseNamespace:
3081 * @ctxt: an XML parser context
3082 *
3083 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3084 *
3085 * This is what the older xml-name Working Draft specified, a bunch of
3086 * other stuff may still rely on it, so support is still here as
3087 * if it was declared on the root of the Tree:-(
3088 *
3089 * TODO: remove from library
3090 *
3091 * To be removed at next drop of binary compatibility
3092 */
3093
3094void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003095xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003096 static int deprecated = 0;
3097 if (!deprecated) {
3098 xmlGenericError(xmlGenericErrorContext,
3099 "xmlParseNamespace() deprecated function reached\n");
3100 deprecated = 1;
3101 }
3102
3103#if 0
3104 xmlChar *href = NULL;
3105 xmlChar *prefix = NULL;
3106 int garbage = 0;
3107
3108 /*
3109 * We just skipped "namespace" or "xml:namespace"
3110 */
3111 SKIP_BLANKS;
3112
3113xmlGenericError(xmlGenericErrorContext,
3114 "xmlParseNamespace: reached loop 5\n");
3115 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3116 /*
3117 * We can have "ns" or "prefix" attributes
3118 * Old encoding as 'href' or 'AS' attributes is still supported
3119 */
3120 if ((RAW == 'n') && (NXT(1) == 's')) {
3121 garbage = 0;
3122 SKIP(2);
3123 SKIP_BLANKS;
3124
3125 if (RAW != '=') continue;
3126 NEXT;
3127 SKIP_BLANKS;
3128
3129 href = xmlParseQuotedString(ctxt);
3130 SKIP_BLANKS;
3131 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3132 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3133 garbage = 0;
3134 SKIP(4);
3135 SKIP_BLANKS;
3136
3137 if (RAW != '=') continue;
3138 NEXT;
3139 SKIP_BLANKS;
3140
3141 href = xmlParseQuotedString(ctxt);
3142 SKIP_BLANKS;
3143 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3144 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3145 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3146 garbage = 0;
3147 SKIP(6);
3148 SKIP_BLANKS;
3149
3150 if (RAW != '=') continue;
3151 NEXT;
3152 SKIP_BLANKS;
3153
3154 prefix = xmlParseQuotedString(ctxt);
3155 SKIP_BLANKS;
3156 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3157 garbage = 0;
3158 SKIP(2);
3159 SKIP_BLANKS;
3160
3161 if (RAW != '=') continue;
3162 NEXT;
3163 SKIP_BLANKS;
3164
3165 prefix = xmlParseQuotedString(ctxt);
3166 SKIP_BLANKS;
3167 } else if ((RAW == '?') && (NXT(1) == '>')) {
3168 garbage = 0;
3169 NEXT;
3170 } else {
3171 /*
3172 * Found garbage when parsing the namespace
3173 */
3174 if (!garbage) {
3175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3176 ctxt->sax->error(ctxt->userData,
3177 "xmlParseNamespace found garbage\n");
3178 }
3179 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3180 ctxt->wellFormed = 0;
3181 ctxt->disableSAX = 1;
3182 NEXT;
3183 }
3184 }
3185
3186 MOVETO_ENDTAG(CUR_PTR);
3187 NEXT;
3188
3189 /*
3190 * Register the DTD.
3191 if (href != NULL)
3192 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3193 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3194 */
3195
3196 if (prefix != NULL) xmlFree(prefix);
3197 if (href != NULL) xmlFree(href);
3198#endif
3199}
3200
3201/**
3202 * xmlScanName:
3203 * @ctxt: an XML parser context
3204 *
3205 * Trickery: parse an XML name but without consuming the input flow
3206 * Needed for rollback cases. Used only when parsing entities references.
3207 *
3208 * TODO: seems deprecated now, only used in the default part of
3209 * xmlParserHandleReference
3210 *
3211 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3212 * CombiningChar | Extender
3213 *
3214 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3215 *
3216 * [6] Names ::= Name (S Name)*
3217 *
3218 * Returns the Name parsed or NULL
3219 */
3220
3221xmlChar *
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003222xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003223 static int deprecated = 0;
3224 if (!deprecated) {
3225 xmlGenericError(xmlGenericErrorContext,
3226 "xmlScanName() deprecated function reached\n");
3227 deprecated = 1;
3228 }
3229 return(NULL);
3230
3231#if 0
3232 xmlChar buf[XML_MAX_NAMELEN];
3233 int len = 0;
3234
3235 GROW;
3236 if (!IS_LETTER(RAW) && (RAW != '_') &&
3237 (RAW != ':')) {
3238 return(NULL);
3239 }
3240
3241
3242 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3243 (NXT(len) == '.') || (NXT(len) == '-') ||
3244 (NXT(len) == '_') || (NXT(len) == ':') ||
3245 (IS_COMBINING(NXT(len))) ||
3246 (IS_EXTENDER(NXT(len)))) {
3247 GROW;
3248 buf[len] = NXT(len);
3249 len++;
3250 if (len >= XML_MAX_NAMELEN) {
3251 xmlGenericError(xmlGenericErrorContext,
3252 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3253 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3254 (IS_DIGIT(NXT(len))) ||
3255 (NXT(len) == '.') || (NXT(len) == '-') ||
3256 (NXT(len) == '_') || (NXT(len) == ':') ||
3257 (IS_COMBINING(NXT(len))) ||
3258 (IS_EXTENDER(NXT(len))))
3259 len++;
3260 break;
3261 }
3262 }
3263 return(xmlStrndup(buf, len));
3264#endif
3265}
3266
3267/**
3268 * xmlParserHandleReference:
3269 * @ctxt: the parser context
3270 *
3271 * TODO: Remove, now deprecated ... the test is done directly in the
3272 * content parsing
3273 * routines.
3274 *
3275 * [67] Reference ::= EntityRef | CharRef
3276 *
3277 * [68] EntityRef ::= '&' Name ';'
3278 *
3279 * [ WFC: Entity Declared ]
3280 * the Name given in the entity reference must match that in an entity
3281 * declaration, except that well-formed documents need not declare any
3282 * of the following entities: amp, lt, gt, apos, quot.
3283 *
3284 * [ WFC: Parsed Entity ]
3285 * An entity reference must not contain the name of an unparsed entity
3286 *
3287 * [66] CharRef ::= '&#' [0-9]+ ';' |
3288 * '&#x' [0-9a-fA-F]+ ';'
3289 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003290 * A PEReference may have been detected in the current input stream
Owen Taylor3473f882001-02-23 17:55:21 +00003291 * the handling is done accordingly to
3292 * http://www.w3.org/TR/REC-xml#entproc
3293 */
3294void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003295xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003296 static int deprecated = 0;
3297 if (!deprecated) {
3298 xmlGenericError(xmlGenericErrorContext,
3299 "xmlParserHandleReference() deprecated function reached\n");
3300 deprecated = 1;
3301 }
3302
3303#if 0
3304 xmlParserInputPtr input;
3305 xmlChar *name;
3306 xmlEntityPtr ent = NULL;
3307
3308 if (ctxt->token != 0) {
3309 return;
3310 }
3311 if (RAW != '&') return;
3312 GROW;
3313 if ((RAW == '&') && (NXT(1) == '#')) {
3314 switch(ctxt->instate) {
3315 case XML_PARSER_ENTITY_DECL:
3316 case XML_PARSER_PI:
3317 case XML_PARSER_CDATA_SECTION:
3318 case XML_PARSER_COMMENT:
3319 case XML_PARSER_SYSTEM_LITERAL:
3320 /* we just ignore it there */
3321 return;
3322 case XML_PARSER_START_TAG:
3323 return;
3324 case XML_PARSER_END_TAG:
3325 return;
3326 case XML_PARSER_EOF:
3327 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3329 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3330 ctxt->wellFormed = 0;
3331 ctxt->disableSAX = 1;
3332 return;
3333 case XML_PARSER_PROLOG:
3334 case XML_PARSER_START:
3335 case XML_PARSER_MISC:
3336 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3339 ctxt->wellFormed = 0;
3340 ctxt->disableSAX = 1;
3341 return;
3342 case XML_PARSER_EPILOG:
3343 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3345 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 case XML_PARSER_DTD:
3350 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003353 "CharRef are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003354 ctxt->wellFormed = 0;
3355 ctxt->disableSAX = 1;
3356 return;
3357 case XML_PARSER_ENTITY_VALUE:
3358 /*
3359 * NOTE: in the case of entity values, we don't do the
3360 * substitution here since we need the literal
3361 * entity value to be able to save the internal
3362 * subset of the document.
3363 * This will be handled by xmlStringDecodeEntities
3364 */
3365 return;
3366 case XML_PARSER_CONTENT:
3367 return;
3368 case XML_PARSER_ATTRIBUTE_VALUE:
3369 /* ctxt->token = xmlParseCharRef(ctxt); */
3370 return;
3371 case XML_PARSER_IGNORE:
3372 return;
3373 }
3374 return;
3375 }
3376
3377 switch(ctxt->instate) {
3378 case XML_PARSER_CDATA_SECTION:
3379 return;
3380 case XML_PARSER_PI:
3381 case XML_PARSER_COMMENT:
3382 case XML_PARSER_SYSTEM_LITERAL:
3383 case XML_PARSER_CONTENT:
3384 return;
3385 case XML_PARSER_START_TAG:
3386 return;
3387 case XML_PARSER_END_TAG:
3388 return;
3389 case XML_PARSER_EOF:
3390 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3392 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3393 ctxt->wellFormed = 0;
3394 ctxt->disableSAX = 1;
3395 return;
3396 case XML_PARSER_PROLOG:
3397 case XML_PARSER_START:
3398 case XML_PARSER_MISC:
3399 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3401 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3402 ctxt->wellFormed = 0;
3403 ctxt->disableSAX = 1;
3404 return;
3405 case XML_PARSER_EPILOG:
3406 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3409 ctxt->wellFormed = 0;
3410 ctxt->disableSAX = 1;
3411 return;
3412 case XML_PARSER_ENTITY_VALUE:
3413 /*
3414 * NOTE: in the case of entity values, we don't do the
3415 * substitution here since we need the literal
3416 * entity value to be able to save the internal
3417 * subset of the document.
3418 * This will be handled by xmlStringDecodeEntities
3419 */
3420 return;
3421 case XML_PARSER_ATTRIBUTE_VALUE:
3422 /*
3423 * NOTE: in the case of attributes values, we don't do the
3424 * substitution here unless we are in a mode where
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003425 * the parser is explicitly asked to substitute
Owen Taylor3473f882001-02-23 17:55:21 +00003426 * entities. The SAX callback is called with values
3427 * without entity substitution.
3428 * This will then be handled by xmlStringDecodeEntities
3429 */
3430 return;
3431 case XML_PARSER_ENTITY_DECL:
3432 /*
3433 * we just ignore it there
3434 * the substitution will be done once the entity is referenced
3435 */
3436 return;
3437 case XML_PARSER_DTD:
3438 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3440 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003441 "Entity references are forbidden in DTDs!\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003442 ctxt->wellFormed = 0;
3443 ctxt->disableSAX = 1;
3444 return;
3445 case XML_PARSER_IGNORE:
3446 return;
3447 }
3448
3449/* TODO: this seems not reached anymore .... Verify ... */
3450xmlGenericError(xmlGenericErrorContext,
3451 "Reached deprecated section in xmlParserHandleReference()\n");
3452xmlGenericError(xmlGenericErrorContext,
Daniel Veillardc5d64342001-06-24 12:13:24 +00003453 "Please forward the document to daniel@veillard.com\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003454xmlGenericError(xmlGenericErrorContext,
3455 "indicating the version: %s, thanks !\n", xmlParserVersion);
3456 NEXT;
3457 name = xmlScanName(ctxt);
3458 if (name == NULL) {
3459 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3462 ctxt->wellFormed = 0;
3463 ctxt->disableSAX = 1;
3464 ctxt->token = '&';
3465 return;
3466 }
3467 if (NXT(xmlStrlen(name)) != ';') {
3468 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3470 ctxt->sax->error(ctxt->userData,
3471 "Entity reference: ';' expected\n");
3472 ctxt->wellFormed = 0;
3473 ctxt->disableSAX = 1;
3474 ctxt->token = '&';
3475 xmlFree(name);
3476 return;
3477 }
3478 SKIP(xmlStrlen(name) + 1);
3479 if (ctxt->sax != NULL) {
3480 if (ctxt->sax->getEntity != NULL)
3481 ent = ctxt->sax->getEntity(ctxt->userData, name);
3482 }
3483
3484 /*
3485 * [ WFC: Entity Declared ]
3486 * the Name given in the entity reference must match that in an entity
3487 * declaration, except that well-formed documents need not declare any
3488 * of the following entities: amp, lt, gt, apos, quot.
3489 */
3490 if (ent == NULL)
3491 ent = xmlGetPredefinedEntity(name);
3492 if (ent == NULL) {
3493 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496 "Entity reference: entity %s not declared\n",
3497 name);
3498 ctxt->wellFormed = 0;
3499 ctxt->disableSAX = 1;
3500 xmlFree(name);
3501 return;
3502 }
3503
3504 /*
3505 * [ WFC: Parsed Entity ]
3506 * An entity reference must not contain the name of an unparsed entity
3507 */
3508 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3509 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3511 ctxt->sax->error(ctxt->userData,
3512 "Entity reference to unparsed entity %s\n", name);
3513 ctxt->wellFormed = 0;
3514 ctxt->disableSAX = 1;
3515 }
3516
3517 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3518 ctxt->token = ent->content[0];
3519 xmlFree(name);
3520 return;
3521 }
3522 input = xmlNewEntityInputStream(ctxt, ent);
3523 xmlPushInput(ctxt, input);
3524 xmlFree(name);
3525#endif
3526 return;
3527}
3528
3529/**
3530 * xmlHandleEntity:
3531 * @ctxt: an XML parser context
3532 * @entity: an XML entity pointer.
3533 *
3534 * Default handling of defined entities, when should we define a new input
3535 * stream ? When do we just handle that as a set of chars ?
3536 *
3537 * OBSOLETE: to be removed at some point.
3538 */
3539
3540void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003541xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003542 static int deprecated = 0;
3543 if (!deprecated) {
3544 xmlGenericError(xmlGenericErrorContext,
3545 "xmlHandleEntity() deprecated function reached\n");
3546 deprecated = 1;
3547 }
3548
3549#if 0
3550 int len;
3551 xmlParserInputPtr input;
3552
3553 if (entity->content == NULL) {
3554 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3556 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3557 entity->name);
3558 ctxt->wellFormed = 0;
3559 ctxt->disableSAX = 1;
3560 return;
3561 }
3562 len = xmlStrlen(entity->content);
3563 if (len <= 2) goto handle_as_char;
3564
3565 /*
3566 * Redefine its content as an input stream.
3567 */
3568 input = xmlNewEntityInputStream(ctxt, entity);
3569 xmlPushInput(ctxt, input);
3570 return;
3571
3572handle_as_char:
3573 /*
3574 * Just handle the content as a set of chars.
3575 */
3576 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3577 (ctxt->sax->characters != NULL))
3578 ctxt->sax->characters(ctxt->userData, entity->content, len);
3579#endif
3580}
3581
3582/**
3583 * xmlNewGlobalNs:
3584 * @doc: the document carrying the namespace
3585 * @href: the URI associated
3586 * @prefix: the prefix for the namespace
3587 *
3588 * Creation of a Namespace, the old way using PI and without scoping
3589 * DEPRECATED !!!
3590 * It now create a namespace on the root element of the document if found.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003591 * Returns NULL this functionality had been removed
Owen Taylor3473f882001-02-23 17:55:21 +00003592 */
3593xmlNsPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003594xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3595 const xmlChar *prefix ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 static int deprecated = 0;
3597 if (!deprecated) {
3598 xmlGenericError(xmlGenericErrorContext,
3599 "xmlNewGlobalNs() deprecated function reached\n");
3600 deprecated = 1;
3601 }
3602 return(NULL);
3603#if 0
3604 xmlNodePtr root;
3605
3606 xmlNsPtr cur;
3607
3608 root = xmlDocGetRootElement(doc);
3609 if (root != NULL)
3610 return(xmlNewNs(root, href, prefix));
3611
3612 /*
3613 * if there is no root element yet, create an old Namespace type
3614 * and it will be moved to the root at save time.
3615 */
3616 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3617 if (cur == NULL) {
3618 xmlGenericError(xmlGenericErrorContext,
3619 "xmlNewGlobalNs : malloc failed\n");
3620 return(NULL);
3621 }
3622 memset(cur, 0, sizeof(xmlNs));
3623 cur->type = XML_GLOBAL_NAMESPACE;
3624
3625 if (href != NULL)
3626 cur->href = xmlStrdup(href);
3627 if (prefix != NULL)
3628 cur->prefix = xmlStrdup(prefix);
3629
3630 /*
3631 * Add it at the end to preserve parsing order ...
3632 */
3633 if (doc != NULL) {
3634 if (doc->oldNs == NULL) {
3635 doc->oldNs = cur;
3636 } else {
3637 xmlNsPtr prev = doc->oldNs;
3638
3639 while (prev->next != NULL) prev = prev->next;
3640 prev->next = cur;
3641 }
3642 }
3643
3644 return(NULL);
3645#endif
3646}
3647
3648/**
3649 * xmlUpgradeOldNs:
3650 * @doc: a document pointer
3651 *
3652 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3653 * DEPRECATED
3654 */
3655void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +00003656xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +00003657 static int deprecated = 0;
3658 if (!deprecated) {
3659 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003660 "xmlUpgradeOldNs() deprecated function reached\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003661 deprecated = 1;
3662 }
3663#if 0
3664 xmlNsPtr cur;
3665
3666 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3667 if (doc->children == NULL) {
3668#ifdef DEBUG_TREE
3669 xmlGenericError(xmlGenericErrorContext,
3670 "xmlUpgradeOldNs: failed no root !\n");
3671#endif
3672 return;
3673 }
3674
3675 cur = doc->oldNs;
3676 while (cur->next != NULL) {
3677 cur->type = XML_LOCAL_NAMESPACE;
3678 cur = cur->next;
3679 }
3680 cur->type = XML_LOCAL_NAMESPACE;
3681 cur->next = doc->children->nsDef;
3682 doc->children->nsDef = doc->oldNs;
3683 doc->oldNs = NULL;
3684#endif
3685}
3686