blob: f99ed4e43f97a7e647f70034df113924fd2d1661 [file] [log] [blame]
Daniel Veillardb1059e22000-09-16 14:02:43 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
Daniel Veillardaaf58b92000-10-06 14:07:26 +000042#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000044#include <libxml/entities.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000045#include <libxml/xmlerror.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000046#include <libxml/encoding.h>
47#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000048#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000050
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000071 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000072 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000077 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000078 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
Daniel Veillard0f2a53c2001-02-05 17:57:33 +000086 "load subset",
Daniel Veillardb1059e22000-09-16 14:02:43 +000087 "keep blanks",
88 "disable SAX",
89 "fetch external entities",
90 "substitute entities",
91 "gather line info",
92 "user data",
93 "is html",
94 "is standalone",
95 "stop parser",
96 "document",
97 "is well formed",
98 "is valid",
99 "SAX block",
100 "SAX function internalSubset",
101 "SAX function isStandalone",
102 "SAX function hasInternalSubset",
103 "SAX function hasExternalSubset",
104 "SAX function resolveEntity",
105 "SAX function getEntity",
106 "SAX function entityDecl",
107 "SAX function notationDecl",
108 "SAX function attributeDecl",
109 "SAX function elementDecl",
110 "SAX function unparsedEntityDecl",
111 "SAX function setDocumentLocator",
112 "SAX function startDocument",
113 "SAX function endDocument",
114 "SAX function startElement",
115 "SAX function endElement",
116 "SAX function reference",
117 "SAX function characters",
118 "SAX function ignorableWhitespace",
119 "SAX function processingInstruction",
120 "SAX function comment",
121 "SAX function warning",
122 "SAX function error",
123 "SAX function fatalError",
124 "SAX function getParameterEntity",
125 "SAX function cdataBlock",
126 "SAX function externalSubset",
127};
128
129/*
130 * xmlGetFeaturesList:
131 * @len: the length of the features name array (input/output)
132 * @result: an array of string to be filled with the features name.
133 *
134 * Copy at most *@len feature names into the @result array
135 *
136 * Returns -1 in case or error, or the total number of features,
137 * len is updated with the number of strings copied,
138 * strings must not be deallocated
139 */
140int
141xmlGetFeaturesList(int *len, const char **result) {
142 int ret, i;
143
144 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
145 if ((len == NULL) || (result == NULL))
146 return(ret);
147 if ((*len < 0) || (*len >= 1000))
148 return(-1);
149 if (*len > ret)
150 *len = ret;
151 for (i = 0;i < *len;i++)
152 result[i] = xmlFeaturesList[i];
153 return(ret);
154}
155
156/*
157 * xmlGetFeature:
158 * @ctxt: an XML/HTML parser context
159 * @name: the feature name
160 * @result: location to store the result
161 *
162 * Read the current value of one feature of this parser instance
163 *
164 * Returns -1 in case or error, 0 otherwise
165 */
166int
167xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
168 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
169 return(-1);
170
171 if (!strcmp(name, "validate")) {
172 *((int *) result) = ctxt->validate;
173 } else if (!strcmp(name, "keep blanks")) {
174 *((int *) result) = ctxt->keepBlanks;
175 } else if (!strcmp(name, "disable SAX")) {
176 *((int *) result) = ctxt->disableSAX;
177 } else if (!strcmp(name, "fetch external entities")) {
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000178 *((int *) result) = ctxt->loadsubset;
Daniel Veillardb1059e22000-09-16 14:02:43 +0000179 } else if (!strcmp(name, "substitute entities")) {
180 *((int *) result) = ctxt->replaceEntities;
181 } else if (!strcmp(name, "gather line info")) {
182 *((int *) result) = ctxt->record_info;
183 } else if (!strcmp(name, "user data")) {
184 *((void **)result) = ctxt->userData;
185 } else if (!strcmp(name, "is html")) {
186 *((int *) result) = ctxt->html;
187 } else if (!strcmp(name, "is standalone")) {
188 *((int *) result) = ctxt->standalone;
189 } else if (!strcmp(name, "document")) {
190 *((xmlDocPtr *) result) = ctxt->myDoc;
191 } else if (!strcmp(name, "is well formed")) {
192 *((int *) result) = ctxt->wellFormed;
193 } else if (!strcmp(name, "is valid")) {
194 *((int *) result) = ctxt->valid;
195 } else if (!strcmp(name, "SAX block")) {
196 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
197 } else if (!strcmp(name, "SAX function internalSubset")) {
198 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
199 } else if (!strcmp(name, "SAX function isStandalone")) {
200 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
201 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
202 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
203 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
204 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
205 } else if (!strcmp(name, "SAX function resolveEntity")) {
206 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
207 } else if (!strcmp(name, "SAX function getEntity")) {
208 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
209 } else if (!strcmp(name, "SAX function entityDecl")) {
210 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
211 } else if (!strcmp(name, "SAX function notationDecl")) {
212 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
213 } else if (!strcmp(name, "SAX function attributeDecl")) {
214 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
215 } else if (!strcmp(name, "SAX function elementDecl")) {
216 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
217 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
218 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
219 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
220 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
221 } else if (!strcmp(name, "SAX function startDocument")) {
222 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
223 } else if (!strcmp(name, "SAX function endDocument")) {
224 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
225 } else if (!strcmp(name, "SAX function startElement")) {
226 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
227 } else if (!strcmp(name, "SAX function endElement")) {
228 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
229 } else if (!strcmp(name, "SAX function reference")) {
230 *((referenceSAXFunc *) result) = ctxt->sax->reference;
231 } else if (!strcmp(name, "SAX function characters")) {
232 *((charactersSAXFunc *) result) = ctxt->sax->characters;
233 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
234 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
235 } else if (!strcmp(name, "SAX function processingInstruction")) {
236 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
237 } else if (!strcmp(name, "SAX function comment")) {
238 *((commentSAXFunc *) result) = ctxt->sax->comment;
239 } else if (!strcmp(name, "SAX function warning")) {
240 *((warningSAXFunc *) result) = ctxt->sax->warning;
241 } else if (!strcmp(name, "SAX function error")) {
242 *((errorSAXFunc *) result) = ctxt->sax->error;
243 } else if (!strcmp(name, "SAX function fatalError")) {
244 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
245 } else if (!strcmp(name, "SAX function getParameterEntity")) {
246 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
247 } else if (!strcmp(name, "SAX function cdataBlock")) {
248 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
249 } else if (!strcmp(name, "SAX function externalSubset")) {
250 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
251 } else {
252 return(-1);
253 }
254 return(0);
255}
256
257/*
258 * xmlSetFeature:
259 * @ctxt: an XML/HTML parser context
260 * @name: the feature name
261 * @value: pointer to the location of the new value
262 *
263 * Change the current value of one feature of this parser instance
264 *
265 * Returns -1 in case or error, 0 otherwise
266 */
267int
268xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
269 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
270 return(-1);
271
272 if (!strcmp(name, "validate")) {
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000273 int newvalidate = *((int *) value);
274 if ((!ctxt->validate) && (newvalidate != 0)) {
Daniel Veillardb1059e22000-09-16 14:02:43 +0000275 if (ctxt->vctxt.warning == NULL)
276 ctxt->vctxt.warning = xmlParserValidityWarning;
277 if (ctxt->vctxt.error == NULL)
278 ctxt->vctxt.error = xmlParserValidityError;
279 /* Allocate the Node stack */
280 ctxt->vctxt.nodeTab = (xmlNodePtr *)
281 xmlMalloc(4 * sizeof(xmlNodePtr));
282 if (ctxt->vctxt.nodeTab == NULL) {
283 ctxt->vctxt.nodeMax = 0;
284 ctxt->validate = 0;
285 return(-1);
286 }
287 ctxt->vctxt.nodeNr = 0;
288 ctxt->vctxt.nodeMax = 4;
289 ctxt->vctxt.node = NULL;
290 }
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000291 ctxt->validate = newvalidate;
292 } else if (!strcmp(name, "keep blanks")) {
293 ctxt->keepBlanks = *((int *) value);
294 } else if (!strcmp(name, "disable SAX")) {
295 ctxt->disableSAX = *((int *) value);
296 } else if (!strcmp(name, "fetch external entities")) {
297 ctxt->loadsubset = *((int *) value);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000298 } else if (!strcmp(name, "substitute entities")) {
299 ctxt->replaceEntities = *((int *) value);
300 } else if (!strcmp(name, "gather line info")) {
301 ctxt->record_info = *((int *) value);
302 } else if (!strcmp(name, "user data")) {
303 ctxt->userData = *((void **)value);
304 } else if (!strcmp(name, "is html")) {
305 ctxt->html = *((int *) value);
306 } else if (!strcmp(name, "is standalone")) {
307 ctxt->standalone = *((int *) value);
308 } else if (!strcmp(name, "document")) {
309 ctxt->myDoc = *((xmlDocPtr *) value);
310 } else if (!strcmp(name, "is well formed")) {
311 ctxt->wellFormed = *((int *) value);
312 } else if (!strcmp(name, "is valid")) {
313 ctxt->valid = *((int *) value);
314 } else if (!strcmp(name, "SAX block")) {
315 ctxt->sax = *((xmlSAXHandlerPtr *) value);
316 } else if (!strcmp(name, "SAX function internalSubset")) {
317 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
318 } else if (!strcmp(name, "SAX function isStandalone")) {
319 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
320 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
321 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
322 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
323 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
324 } else if (!strcmp(name, "SAX function resolveEntity")) {
325 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
326 } else if (!strcmp(name, "SAX function getEntity")) {
327 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
328 } else if (!strcmp(name, "SAX function entityDecl")) {
329 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
330 } else if (!strcmp(name, "SAX function notationDecl")) {
331 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
332 } else if (!strcmp(name, "SAX function attributeDecl")) {
333 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
334 } else if (!strcmp(name, "SAX function elementDecl")) {
335 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
336 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
337 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
338 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
339 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
340 } else if (!strcmp(name, "SAX function startDocument")) {
341 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
342 } else if (!strcmp(name, "SAX function endDocument")) {
343 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
344 } else if (!strcmp(name, "SAX function startElement")) {
345 ctxt->sax->startElement = *((startElementSAXFunc *) value);
346 } else if (!strcmp(name, "SAX function endElement")) {
347 ctxt->sax->endElement = *((endElementSAXFunc *) value);
348 } else if (!strcmp(name, "SAX function reference")) {
349 ctxt->sax->reference = *((referenceSAXFunc *) value);
350 } else if (!strcmp(name, "SAX function characters")) {
351 ctxt->sax->characters = *((charactersSAXFunc *) value);
352 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
353 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
354 } else if (!strcmp(name, "SAX function processingInstruction")) {
355 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
356 } else if (!strcmp(name, "SAX function comment")) {
357 ctxt->sax->comment = *((commentSAXFunc *) value);
358 } else if (!strcmp(name, "SAX function warning")) {
359 ctxt->sax->warning = *((warningSAXFunc *) value);
360 } else if (!strcmp(name, "SAX function error")) {
361 ctxt->sax->error = *((errorSAXFunc *) value);
362 } else if (!strcmp(name, "SAX function fatalError")) {
363 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
364 } else if (!strcmp(name, "SAX function getParameterEntity")) {
365 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
366 } else if (!strcmp(name, "SAX function cdataBlock")) {
367 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
368 } else if (!strcmp(name, "SAX function externalSubset")) {
369 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
370 } else {
371 return(-1);
372 }
373 return(0);
374}
375
376/************************************************************************
377 * *
378 * Some functions to avoid too large macros *
379 * *
380 ************************************************************************/
381
382/**
383 * xmlIsChar:
384 * @c: an unicode character (int)
385 *
386 * Check whether the character is allowed by the production
387 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
388 * | [#x10000-#x10FFFF]
389 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
390 * Also available as a macro IS_CHAR()
391 *
392 * Returns 0 if not, non-zero otherwise
393 */
394int
395xmlIsChar(int c) {
396 return(
397 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
398 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
399 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
400 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
401}
402
403/**
404 * xmlIsBlank:
405 * @c: an unicode character (int)
406 *
407 * Check whether the character is allowed by the production
408 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
409 * Also available as a macro IS_BLANK()
410 *
411 * Returns 0 if not, non-zero otherwise
412 */
413int
414xmlIsBlank(int c) {
415 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
416}
417
418/**
419 * xmlIsBaseChar:
420 * @c: an unicode character (int)
421 *
422 * Check whether the character is allowed by the production
423 * [85] BaseChar ::= ... long list see REC ...
424 *
425 * VI is your friend !
426 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
427 * and
428 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
429 *
430 * Returns 0 if not, non-zero otherwise
431 */
Daniel Veillard167bd532001-01-06 21:09:34 +0000432static int xmlBaseArray[] = {
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
446 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
448 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
449};
450
Daniel Veillardb1059e22000-09-16 14:02:43 +0000451int
452xmlIsBaseChar(int c) {
453 return(
Daniel Veillard167bd532001-01-06 21:09:34 +0000454 (((c) < 0x0100) ? xmlBaseArray[c] :
455 ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000456 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
457 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
458 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
459 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
460 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
461 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
462 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
463 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
464 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
465 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
466 ((c) == 0x0386) ||
467 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
468 ((c) == 0x038C) ||
469 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
470 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
471 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
472 ((c) == 0x03DA) ||
473 ((c) == 0x03DC) ||
474 ((c) == 0x03DE) ||
475 ((c) == 0x03E0) ||
476 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
477 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
478 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
479 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
480 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
481 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
482 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
483 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
484 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
485 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
486 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
487 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
488 ((c) == 0x0559) ||
489 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
490 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
491 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
492 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
493 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
494 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
495 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
496 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
497 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
498 ((c) == 0x06D5) ||
499 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000500 (((c) >= 0x905) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000501 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
502 ((c) == 0x093D) ||
503 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
504 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
505 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
506 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
507 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
508 ((c) == 0x09B2) ||
509 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
510 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
511 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
512 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
513 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
514 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
515 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
516 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
517 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
518 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
519 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
520 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
521 ((c) == 0x0A5E) ||
522 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
523 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
524 ((c) == 0x0A8D) ||
525 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
526 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
527 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
528 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
529 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
530 ((c) == 0x0ABD) ||
531 ((c) == 0x0AE0) ||
532 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
533 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
534 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
535 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
536 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
537 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
538 ((c) == 0x0B3D) ||
539 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
540 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
541 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
542 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
543 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
544 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
545 ((c) == 0x0B9C) ||
546 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
547 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
548 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
549 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
550 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
551 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
552 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
553 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
554 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
555 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
556 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
557 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
558 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
559 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
560 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
561 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
562 ((c) == 0x0CDE) ||
563 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
564 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
565 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
566 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
567 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
568 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
569 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
570 ((c) == 0x0E30) ||
571 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
572 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
573 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
574 ((c) == 0x0E84) ||
575 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
576 ((c) == 0x0E8A) ||
577 ((c) == 0x0E8D) ||
578 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
579 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
580 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
581 ((c) == 0x0EA5) ||
582 ((c) == 0x0EA7) ||
583 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
584 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
585 ((c) == 0x0EB0) ||
586 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
587 ((c) == 0x0EBD) ||
588 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
589 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
590 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000591 (((c) >= 0x10A0) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000592 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
593 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
594 ((c) == 0x1100) ||
595 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
596 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
597 ((c) == 0x1109) ||
598 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
599 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
600 ((c) == 0x113C) ||
601 ((c) == 0x113E) ||
602 ((c) == 0x1140) ||
603 ((c) == 0x114C) ||
604 ((c) == 0x114E) ||
605 ((c) == 0x1150) ||
606 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
607 ((c) == 0x1159) ||
608 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
609 ((c) == 0x1163) ||
610 ((c) == 0x1165) ||
611 ((c) == 0x1167) ||
612 ((c) == 0x1169) ||
613 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
614 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
615 ((c) == 0x1175) ||
616 ((c) == 0x119E) ||
617 ((c) == 0x11A8) ||
618 ((c) == 0x11AB) ||
619 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
620 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
621 ((c) == 0x11BA) ||
622 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 ((c) == 0x11EB) ||
624 ((c) == 0x11F0) ||
625 ((c) == 0x11F9) ||
626 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
627 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
628 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
629 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
630 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
631 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
632 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 ((c) == 0x1F59) ||
634 ((c) == 0x1F5B) ||
635 ((c) == 0x1F5D) ||
636 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
637 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
638 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
639 ((c) == 0x1FBE) ||
640 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
641 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
642 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
643 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
644 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
645 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
646 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
647 ((c) == 0x2126) ||
648 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
649 ((c) == 0x212E) ||
650 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
651 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
652 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
653 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
654 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
655}
656
657/**
658 * xmlIsDigit:
659 * @c: an unicode character (int)
660 *
661 * Check whether the character is allowed by the production
662 * [88] Digit ::= ... long list see REC ...
663 *
664 * Returns 0 if not, non-zero otherwise
665 */
666int
667xmlIsDigit(int c) {
668 return(
669 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
670 (((c) >= 0x660) && ( /* accelerator */
671 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
672 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
673 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
674 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
675 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
676 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
677 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
678 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
679 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
680 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
681 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
682 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
683 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
684 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
685}
686
687/**
688 * xmlIsCombining:
689 * @c: an unicode character (int)
690 *
691 * Check whether the character is allowed by the production
692 * [87] CombiningChar ::= ... long list see REC ...
693 *
694 * Returns 0 if not, non-zero otherwise
695 */
696int
697xmlIsCombining(int c) {
698 return(
699 (((c) >= 0x300) && ( /* accelerator */
700 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
701 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
702 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
703 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
704 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
705 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
706 ((c) == 0x05BF) ||
707 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
708 ((c) == 0x05C4) ||
709 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
710 ((c) == 0x0670) ||
711 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
712 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
713 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
714 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
715 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000716 (((c) >= 0x0901) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000717 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
718 ((c) == 0x093C) ||
719 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
720 ((c) == 0x094D) ||
721 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
722 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
723 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 ((c) == 0x09BC) ||
725 ((c) == 0x09BE) ||
726 ((c) == 0x09BF) ||
727 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
728 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
729 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
730 ((c) == 0x09D7) ||
731 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000732 (((c) >= 0x0A02) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000733 ((c) == 0x0A02) ||
734 ((c) == 0x0A3C) ||
735 ((c) == 0x0A3E) ||
736 ((c) == 0x0A3F) ||
737 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
738 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
739 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
740 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
741 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
742 ((c) == 0x0ABC) ||
743 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
744 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
745 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
746 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
747 ((c) == 0x0B3C) ||
748 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
749 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
750 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
751 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
752 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
753 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
754 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
755 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
756 ((c) == 0x0BD7) ||
757 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
758 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
759 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
760 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
761 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
762 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
763 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
764 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
765 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
766 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
767 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
768 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
769 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
770 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
771 ((c) == 0x0D57) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000772 (((c) >= 0x0E31) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000773 ((c) == 0x0E31) ||
774 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
775 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
776 ((c) == 0x0EB1) ||
777 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
778 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
779 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
780 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
781 ((c) == 0x0F35) ||
782 ((c) == 0x0F37) ||
783 ((c) == 0x0F39) ||
784 ((c) == 0x0F3E) ||
785 ((c) == 0x0F3F) ||
786 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
787 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
788 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
789 ((c) == 0x0F97) ||
790 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
791 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
792 ((c) == 0x0FB9) ||
793 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
794 ((c) == 0x20E1) ||
795 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
796 ((c) == 0x3099) ||
797 ((c) == 0x309A))))))))));
798}
799
800/**
801 * xmlIsExtender:
802 * @c: an unicode character (int)
803 *
804 * Check whether the character is allowed by the production
805 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
806 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
807 * [#x309D-#x309E] | [#x30FC-#x30FE]
808 *
809 * Returns 0 if not, non-zero otherwise
810 */
811int
812xmlIsExtender(int c) {
Daniel Veillard167bd532001-01-06 21:09:34 +0000813 switch (c) {
814 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
815 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
816 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
817 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
818 case 0x30FE:
819 return 1;
820 default:
821 return 0;
822 }
Daniel Veillardb1059e22000-09-16 14:02:43 +0000823}
824
825/**
826 * xmlIsIdeographic:
827 * @c: an unicode character (int)
828 *
829 * Check whether the character is allowed by the production
830 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
831 *
832 * Returns 0 if not, non-zero otherwise
833 */
834int
835xmlIsIdeographic(int c) {
Daniel Veillard167bd532001-01-06 21:09:34 +0000836 return(((c) < 0x0100) ? 0 :
Daniel Veillardb1059e22000-09-16 14:02:43 +0000837 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
838 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
839 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
840 ((c) == 0x3007));
841}
842
843/**
844 * xmlIsLetter:
845 * @c: an unicode character (int)
846 *
847 * Check whether the character is allowed by the production
848 * [84] Letter ::= BaseChar | Ideographic
849 *
850 * Returns 0 if not, non-zero otherwise
851 */
852int
853xmlIsLetter(int c) {
854 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
855}
856
857/**
858 * xmlIsPubidChar:
859 * @c: an unicode character (int)
860 *
861 * Check whether the character is allowed by the production
862 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
863 *
864 * Returns 0 if not, non-zero otherwise
865 */
866int
867xmlIsPubidChar(int c) {
868 return(
869 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
870 (((c) >= 'a') && ((c) <= 'z')) ||
871 (((c) >= 'A') && ((c) <= 'Z')) ||
872 (((c) >= '0') && ((c) <= '9')) ||
873 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
874 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
875 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
876 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
877 ((c) == '$') || ((c) == '_') || ((c) == '%'));
878}
879
880/************************************************************************
881 * *
882 * Input handling functions for progressive parsing *
883 * *
884 ************************************************************************/
885
886/* #define DEBUG_INPUT */
887/* #define DEBUG_STACK */
888/* #define DEBUG_PUSH */
889
890
891/* we need to keep enough input to show errors in context */
892#define LINE_LEN 80
893
894#ifdef DEBUG_INPUT
895#define CHECK_BUFFER(in) check_buffer(in)
896
897void check_buffer(xmlParserInputPtr in) {
898 if (in->base != in->buf->buffer->content) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000899 xmlGenericError(xmlGenericErrorContext,
900 "xmlParserInput: base mismatch problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000901 }
902 if (in->cur < in->base) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000903 xmlGenericError(xmlGenericErrorContext,
904 "xmlParserInput: cur < base problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000905 }
906 if (in->cur > in->base + in->buf->buffer->use) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000907 xmlGenericError(xmlGenericErrorContext,
908 "xmlParserInput: cur > base + use problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000909 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000910 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +0000911 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
912 in->buf->buffer->use, in->buf->buffer->size);
913}
914
915#else
916#define CHECK_BUFFER(in)
917#endif
918
919
920/**
921 * xmlParserInputRead:
922 * @in: an XML parser input
923 * @len: an indicative size for the lookahead
924 *
925 * This function refresh the input for the parser. It doesn't try to
926 * preserve pointers to the input buffer, and discard already read data
927 *
928 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
929 * end of this entity
930 */
931int
932xmlParserInputRead(xmlParserInputPtr in, int len) {
933 int ret;
934 int used;
935 int index;
936
937#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000938 xmlGenericError(xmlGenericErrorContext, "Read\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000939#endif
940 if (in->buf == NULL) return(-1);
941 if (in->base == NULL) return(-1);
942 if (in->cur == NULL) return(-1);
943 if (in->buf->buffer == NULL) return(-1);
944 if (in->buf->readcallback == NULL) return(-1);
945
946 CHECK_BUFFER(in);
947
948 used = in->cur - in->buf->buffer->content;
949 ret = xmlBufferShrink(in->buf->buffer, used);
950 if (ret > 0) {
951 in->cur -= ret;
952 in->consumed += ret;
953 }
954 ret = xmlParserInputBufferRead(in->buf, len);
955 if (in->base != in->buf->buffer->content) {
956 /*
957 * the buffer has been realloced
958 */
959 index = in->cur - in->base;
960 in->base = in->buf->buffer->content;
961 in->cur = &in->buf->buffer->content[index];
962 }
963
964 CHECK_BUFFER(in);
965
966 return(ret);
967}
968
969/**
970 * xmlParserInputGrow:
971 * @in: an XML parser input
972 * @len: an indicative size for the lookahead
973 *
974 * This function increase the input for the parser. It tries to
975 * preserve pointers to the input buffer, and keep already read data
976 *
977 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
978 * end of this entity
979 */
980int
981xmlParserInputGrow(xmlParserInputPtr in, int len) {
982 int ret;
983 int index;
984
985#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000986 xmlGenericError(xmlGenericErrorContext, "Grow\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000987#endif
988 if (in->buf == NULL) return(-1);
989 if (in->base == NULL) return(-1);
990 if (in->cur == NULL) return(-1);
991 if (in->buf->buffer == NULL) return(-1);
992
993 CHECK_BUFFER(in);
994
995 index = in->cur - in->base;
996 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
997
998 CHECK_BUFFER(in);
999
1000 return(0);
1001 }
1002 if (in->buf->readcallback != NULL)
1003 ret = xmlParserInputBufferGrow(in->buf, len);
1004 else
1005 return(0);
1006
1007 /*
1008 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
1009 * block, but we use it really as an integer to do some
1010 * pointer arithmetic. Insure will raise it as a bug but in
1011 * that specific case, that's not !
1012 */
1013 if (in->base != in->buf->buffer->content) {
1014 /*
1015 * the buffer has been realloced
1016 */
1017 index = in->cur - in->base;
1018 in->base = in->buf->buffer->content;
1019 in->cur = &in->buf->buffer->content[index];
1020 }
1021
1022 CHECK_BUFFER(in);
1023
1024 return(ret);
1025}
1026
1027/**
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1030 *
1031 * This function removes used input for the parser.
1032 */
1033void
1034xmlParserInputShrink(xmlParserInputPtr in) {
1035 int used;
1036 int ret;
1037 int index;
1038
1039#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001041#endif
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1046
1047 CHECK_BUFFER(in);
1048
1049 used = in->cur - in->buf->buffer->content;
Daniel Veillard1baf4122000-10-15 20:38:39 +00001050 /*
1051 * Do not shrink on large buffers whose only a tiny fraction
1052 * was consumned
1053 */
1054 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1055 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00001056 if (used > INPUT_CHUNK) {
1057 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1058 if (ret > 0) {
1059 in->cur -= ret;
1060 in->consumed += ret;
1061 }
1062 }
1063
1064 CHECK_BUFFER(in);
1065
1066 if (in->buf->buffer->use > INPUT_CHUNK) {
1067 return;
1068 }
1069 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1070 if (in->base != in->buf->buffer->content) {
1071 /*
1072 * the buffer has been realloced
1073 */
1074 index = in->cur - in->base;
1075 in->base = in->buf->buffer->content;
1076 in->cur = &in->buf->buffer->content[index];
1077 }
1078
1079 CHECK_BUFFER(in);
1080}
1081
1082/************************************************************************
1083 * *
1084 * UTF8 character input and related functions *
1085 * *
1086 ************************************************************************/
1087
1088/**
1089 * xmlNextChar:
1090 * @ctxt: the XML parser context
1091 *
1092 * Skip to the next char input char.
1093 */
1094
1095void
1096xmlNextChar(xmlParserCtxtPtr ctxt) {
1097 if (ctxt->instate == XML_PARSER_EOF)
1098 return;
1099
1100 /*
1101 * 2.11 End-of-Line Handling
1102 * the literal two-character sequence "#xD#xA" or a standalone
1103 * literal #xD, an XML processor must pass to the application
1104 * the single character #xA.
1105 */
1106 if (ctxt->token != 0) ctxt->token = 0;
1107 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1108 if ((*ctxt->input->cur == 0) &&
1109 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1110 (ctxt->instate != XML_PARSER_COMMENT)) {
1111 /*
1112 * If we are at the end of the current entity and
1113 * the context allows it, we pop consumed entities
1114 * automatically.
1115 * the auto closing should be blocked in other cases
1116 */
1117 xmlPopInput(ctxt);
1118 } else {
1119 if (*(ctxt->input->cur) == '\n') {
1120 ctxt->input->line++; ctxt->input->col = 1;
1121 } else ctxt->input->col++;
1122 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * We are supposed to handle UTF8, check it's valid
1125 * From rfc2044: encoding of the Unicode values on UTF-8:
1126 *
1127 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128 * 0000 0000-0000 007F 0xxxxxxx
1129 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131 *
1132 * Check for the 0x110000 limit too
1133 */
1134 const unsigned char *cur = ctxt->input->cur;
1135 unsigned char c;
1136
1137 c = *cur;
1138 if (c & 0x80) {
1139 if (cur[1] == 0)
1140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1141 if ((cur[1] & 0xc0) != 0x80)
1142 goto encoding_error;
1143 if ((c & 0xe0) == 0xe0) {
1144 unsigned int val;
1145
1146 if (cur[2] == 0)
1147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1148 if ((cur[2] & 0xc0) != 0x80)
1149 goto encoding_error;
1150 if ((c & 0xf0) == 0xf0) {
1151 if (cur[3] == 0)
1152 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1153 if (((c & 0xf8) != 0xf0) ||
1154 ((cur[3] & 0xc0) != 0x80))
1155 goto encoding_error;
1156 /* 4-byte code */
1157 ctxt->input->cur += 4;
1158 val = (cur[0] & 0x7) << 18;
1159 val |= (cur[1] & 0x3f) << 12;
1160 val |= (cur[2] & 0x3f) << 6;
1161 val |= cur[3] & 0x3f;
1162 } else {
1163 /* 3-byte code */
1164 ctxt->input->cur += 3;
1165 val = (cur[0] & 0xf) << 12;
1166 val |= (cur[1] & 0x3f) << 6;
1167 val |= cur[2] & 0x3f;
1168 }
1169 if (((val > 0xd7ff) && (val < 0xe000)) ||
1170 ((val > 0xfffd) && (val < 0x10000)) ||
1171 (val >= 0x110000)) {
1172 if ((ctxt->sax != NULL) &&
1173 (ctxt->sax->error != NULL))
1174 ctxt->sax->error(ctxt->userData,
1175 "Char 0x%X out of allowed range\n", val);
1176 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1177 ctxt->wellFormed = 0;
1178 ctxt->disableSAX = 1;
1179 }
1180 } else
1181 /* 2-byte code */
1182 ctxt->input->cur += 2;
1183 } else
1184 /* 1-byte code */
1185 ctxt->input->cur++;
1186 } else {
1187 /*
1188 * Assume it's a fixed lenght encoding (1) with
1189 * a compatibke encoding for the ASCII set, since
1190 * XML constructs only use < 128 chars
1191 */
1192 ctxt->input->cur++;
1193 }
1194 ctxt->nbChars++;
1195 if (*ctxt->input->cur == 0)
1196 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1197 }
1198 } else {
1199 ctxt->input->cur++;
1200 ctxt->nbChars++;
1201 if (*ctxt->input->cur == 0)
1202 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1203 }
1204 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1205 xmlParserHandlePEReference(ctxt);
1206 if ((*ctxt->input->cur == 0) &&
1207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1208 xmlPopInput(ctxt);
1209 return;
1210encoding_error:
1211 /*
1212 * If we detect an UTF8 error that probably mean that the
1213 * input encoding didn't get properly advertized in the
1214 * declaration header. Report the error and switch the encoding
1215 * to ISO-Latin-1 (if you don't like this policy, just declare the
1216 * encoding !)
1217 */
1218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1219 ctxt->sax->error(ctxt->userData,
1220 "Input is not proper UTF-8, indicate encoding !\n");
1221 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1222 ctxt->input->cur[0], ctxt->input->cur[1],
1223 ctxt->input->cur[2], ctxt->input->cur[3]);
1224 }
1225 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1226
1227 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1228 ctxt->input->cur++;
1229 return;
1230}
1231
1232/**
1233 * xmlCurrentChar:
1234 * @ctxt: the XML parser context
1235 * @len: pointer to the length of the char read
1236 *
1237 * The current char value, if using UTF-8 this may actaully span multiple
1238 * bytes in the input buffer. Implement the end of line normalization:
1239 * 2.11 End-of-Line Handling
1240 * Wherever an external parsed entity or the literal entity value
1241 * of an internal parsed entity contains either the literal two-character
1242 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1243 * must pass to the application the single character #xA.
1244 * This behavior can conveniently be produced by normalizing all
1245 * line breaks to #xA on input, before parsing.)
1246 *
1247 * Returns the current char value and its lenght
1248 */
1249
1250int
1251xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1252 if (ctxt->instate == XML_PARSER_EOF)
1253 return(0);
1254
1255 if (ctxt->token != 0) {
1256 *len = 0;
1257 return(ctxt->token);
1258 }
1259 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1260 /*
1261 * We are supposed to handle UTF8, check it's valid
1262 * From rfc2044: encoding of the Unicode values on UTF-8:
1263 *
1264 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1265 * 0000 0000-0000 007F 0xxxxxxx
1266 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1267 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1268 *
1269 * Check for the 0x110000 limit too
1270 */
1271 const unsigned char *cur = ctxt->input->cur;
1272 unsigned char c;
1273 unsigned int val;
1274
1275 c = *cur;
1276 if (c & 0x80) {
1277 if (cur[1] == 0)
1278 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1279 if ((cur[1] & 0xc0) != 0x80)
1280 goto encoding_error;
1281 if ((c & 0xe0) == 0xe0) {
1282
1283 if (cur[2] == 0)
1284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 if ((cur[2] & 0xc0) != 0x80)
1286 goto encoding_error;
1287 if ((c & 0xf0) == 0xf0) {
1288 if (cur[3] == 0)
1289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1290 if (((c & 0xf8) != 0xf0) ||
1291 ((cur[3] & 0xc0) != 0x80))
1292 goto encoding_error;
1293 /* 4-byte code */
1294 *len = 4;
1295 val = (cur[0] & 0x7) << 18;
1296 val |= (cur[1] & 0x3f) << 12;
1297 val |= (cur[2] & 0x3f) << 6;
1298 val |= cur[3] & 0x3f;
1299 } else {
1300 /* 3-byte code */
1301 *len = 3;
1302 val = (cur[0] & 0xf) << 12;
1303 val |= (cur[1] & 0x3f) << 6;
1304 val |= cur[2] & 0x3f;
1305 }
1306 } else {
1307 /* 2-byte code */
1308 *len = 2;
1309 val = (cur[0] & 0x1f) << 6;
1310 val |= cur[1] & 0x3f;
1311 }
1312 if (!IS_CHAR(val)) {
1313 if ((ctxt->sax != NULL) &&
1314 (ctxt->sax->error != NULL))
1315 ctxt->sax->error(ctxt->userData,
1316 "Char 0x%X out of allowed range\n", val);
1317 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1318 ctxt->wellFormed = 0;
1319 ctxt->disableSAX = 1;
1320 }
1321 return(val);
1322 } else {
1323 /* 1-byte code */
1324 *len = 1;
1325 if (*ctxt->input->cur == 0xD) {
1326 if (ctxt->input->cur[1] == 0xA) {
1327 ctxt->nbChars++;
1328 ctxt->input->cur++;
1329 }
1330 return(0xA);
1331 }
1332 return((int) *ctxt->input->cur);
1333 }
1334 }
1335 /*
1336 * Assume it's a fixed lenght encoding (1) with
1337 * a compatibke encoding for the ASCII set, since
1338 * XML constructs only use < 128 chars
1339 */
1340 *len = 1;
1341 if (*ctxt->input->cur == 0xD) {
1342 if (ctxt->input->cur[1] == 0xA) {
1343 ctxt->nbChars++;
1344 ctxt->input->cur++;
1345 }
1346 return(0xA);
1347 }
1348 return((int) *ctxt->input->cur);
1349encoding_error:
1350 /*
1351 * If we detect an UTF8 error that probably mean that the
1352 * input encoding didn't get properly advertized in the
1353 * declaration header. Report the error and switch the encoding
1354 * to ISO-Latin-1 (if you don't like this policy, just declare the
1355 * encoding !)
1356 */
1357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1358 ctxt->sax->error(ctxt->userData,
1359 "Input is not proper UTF-8, indicate encoding !\n");
1360 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1361 ctxt->input->cur[0], ctxt->input->cur[1],
1362 ctxt->input->cur[2], ctxt->input->cur[3]);
1363 }
1364 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1365
1366 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1367 *len = 1;
1368 return((int) *ctxt->input->cur);
1369}
1370
1371/**
1372 * xmlStringCurrentChar:
1373 * @ctxt: the XML parser context
1374 * @cur: pointer to the beginning of the char
1375 * @len: pointer to the length of the char read
1376 *
1377 * The current char value, if using UTF-8 this may actaully span multiple
1378 * bytes in the input buffer.
1379 *
1380 * Returns the current char value and its lenght
1381 */
1382
1383int
1384xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1385 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1386 /*
1387 * We are supposed to handle UTF8, check it's valid
1388 * From rfc2044: encoding of the Unicode values on UTF-8:
1389 *
1390 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1391 * 0000 0000-0000 007F 0xxxxxxx
1392 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1393 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1394 *
1395 * Check for the 0x110000 limit too
1396 */
1397 unsigned char c;
1398 unsigned int val;
1399
1400 c = *cur;
1401 if (c & 0x80) {
1402 if ((cur[1] & 0xc0) != 0x80)
1403 goto encoding_error;
1404 if ((c & 0xe0) == 0xe0) {
1405
1406 if ((cur[2] & 0xc0) != 0x80)
1407 goto encoding_error;
1408 if ((c & 0xf0) == 0xf0) {
1409 if (((c & 0xf8) != 0xf0) ||
1410 ((cur[3] & 0xc0) != 0x80))
1411 goto encoding_error;
1412 /* 4-byte code */
1413 *len = 4;
1414 val = (cur[0] & 0x7) << 18;
1415 val |= (cur[1] & 0x3f) << 12;
1416 val |= (cur[2] & 0x3f) << 6;
1417 val |= cur[3] & 0x3f;
1418 } else {
1419 /* 3-byte code */
1420 *len = 3;
1421 val = (cur[0] & 0xf) << 12;
1422 val |= (cur[1] & 0x3f) << 6;
1423 val |= cur[2] & 0x3f;
1424 }
1425 } else {
1426 /* 2-byte code */
1427 *len = 2;
1428 val = (cur[0] & 0x1f) << 6;
1429 val |= cur[2] & 0x3f;
1430 }
1431 if (!IS_CHAR(val)) {
1432 if ((ctxt->sax != NULL) &&
1433 (ctxt->sax->error != NULL))
1434 ctxt->sax->error(ctxt->userData,
1435 "Char 0x%X out of allowed range\n", val);
1436 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1437 ctxt->wellFormed = 0;
1438 ctxt->disableSAX = 1;
1439 }
1440 return(val);
1441 } else {
1442 /* 1-byte code */
1443 *len = 1;
1444 return((int) *cur);
1445 }
1446 }
1447 /*
1448 * Assume it's a fixed lenght encoding (1) with
1449 * a compatibke encoding for the ASCII set, since
1450 * XML constructs only use < 128 chars
1451 */
1452 *len = 1;
1453 return((int) *cur);
1454encoding_error:
1455 /*
1456 * If we detect an UTF8 error that probably mean that the
1457 * input encoding didn't get properly advertized in the
1458 * declaration header. Report the error and switch the encoding
1459 * to ISO-Latin-1 (if you don't like this policy, just declare the
1460 * encoding !)
1461 */
1462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1463 ctxt->sax->error(ctxt->userData,
1464 "Input is not proper UTF-8, indicate encoding !\n");
1465 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1466 ctxt->input->cur[0], ctxt->input->cur[1],
1467 ctxt->input->cur[2], ctxt->input->cur[3]);
1468 }
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470
1471 *len = 1;
1472 return((int) *cur);
1473}
1474
1475/**
1476 * xmlCopyChar:
1477 * @len: pointer to the length of the char read (or zero)
1478 * @array: pointer to an arry of xmlChar
1479 * @val: the char value
1480 *
1481 * append the char value in the array
1482 *
1483 * Returns the number of xmlChar written
1484 */
1485
1486int
1487xmlCopyChar(int len, xmlChar *out, int val) {
1488 /*
1489 * We are supposed to handle UTF8, check it's valid
1490 * From rfc2044: encoding of the Unicode values on UTF-8:
1491 *
1492 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1493 * 0000 0000-0000 007F 0xxxxxxx
1494 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1495 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1496 */
1497 if (len == 0) {
1498 if (val < 0) len = 0;
1499 else if (val < 0x80) len = 1;
1500 else if (val < 0x800) len = 2;
1501 else if (val < 0x10000) len = 3;
1502 else if (val < 0x110000) len = 4;
1503 if (len == 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001504 xmlGenericError(xmlGenericErrorContext,
1505 "Internal error, xmlCopyChar 0x%X out of bound\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +00001506 val);
1507 return(0);
1508 }
1509 }
1510 if (len > 1) {
1511 int bits;
1512
1513 if (val < 0x80) { *out++= val; bits= -6; }
1514 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1515 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1516 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1517
1518 for ( ; bits >= 0; bits-= 6)
1519 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1520
1521 return(len);
1522 }
1523 *out = (xmlChar) val;
1524 return(1);
1525}
1526
1527/************************************************************************
1528 * *
Daniel Veillard04698d92000-09-17 16:00:22 +00001529 * Commodity functions to switch encodings *
1530 * *
1531 ************************************************************************/
1532
1533/**
1534 * xmlSwitchEncoding:
1535 * @ctxt: the parser context
1536 * @enc: the encoding value (number)
1537 *
1538 * change the input functions when discovering the character encoding
1539 * of a given entity.
1540 *
1541 * Returns 0 in case of success, -1 otherwise
1542 */
1543int
1544xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1545{
1546 xmlCharEncodingHandlerPtr handler;
1547
1548 switch (enc) {
1549 case XML_CHAR_ENCODING_ERROR:
1550 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1553 ctxt->wellFormed = 0;
1554 ctxt->disableSAX = 1;
1555 break;
1556 case XML_CHAR_ENCODING_NONE:
1557 /* let's assume it's UTF-8 without the XML decl */
1558 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1559 return(0);
1560 case XML_CHAR_ENCODING_UTF8:
1561 /* default encoding, no conversion should be needed */
1562 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1563 return(0);
1564 default:
1565 break;
1566 }
1567 handler = xmlGetCharEncodingHandler(enc);
1568 if (handler == NULL) {
1569 /*
1570 * Default handlers.
1571 */
1572 switch (enc) {
1573 case XML_CHAR_ENCODING_ERROR:
1574 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1576 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1577 ctxt->wellFormed = 0;
1578 ctxt->disableSAX = 1;
1579 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1580 break;
1581 case XML_CHAR_ENCODING_NONE:
1582 /* let's assume it's UTF-8 without the XML decl */
1583 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1584 return(0);
1585 case XML_CHAR_ENCODING_UTF8:
1586 case XML_CHAR_ENCODING_ASCII:
1587 /* default encoding, no conversion should be needed */
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1589 return(0);
1590 case XML_CHAR_ENCODING_UTF16LE:
1591 break;
1592 case XML_CHAR_ENCODING_UTF16BE:
1593 break;
1594 case XML_CHAR_ENCODING_UCS4LE:
1595 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1597 ctxt->sax->error(ctxt->userData,
1598 "char encoding USC4 little endian not supported\n");
1599 break;
1600 case XML_CHAR_ENCODING_UCS4BE:
1601 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "char encoding USC4 big endian not supported\n");
1605 break;
1606 case XML_CHAR_ENCODING_EBCDIC:
1607 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1609 ctxt->sax->error(ctxt->userData,
1610 "char encoding EBCDIC not supported\n");
1611 break;
1612 case XML_CHAR_ENCODING_UCS4_2143:
1613 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData,
1616 "char encoding UCS4 2143 not supported\n");
1617 break;
1618 case XML_CHAR_ENCODING_UCS4_3412:
1619 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1621 ctxt->sax->error(ctxt->userData,
1622 "char encoding UCS4 3412 not supported\n");
1623 break;
1624 case XML_CHAR_ENCODING_UCS2:
1625 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1627 ctxt->sax->error(ctxt->userData,
1628 "char encoding UCS2 not supported\n");
1629 break;
1630 case XML_CHAR_ENCODING_8859_1:
1631 case XML_CHAR_ENCODING_8859_2:
1632 case XML_CHAR_ENCODING_8859_3:
1633 case XML_CHAR_ENCODING_8859_4:
1634 case XML_CHAR_ENCODING_8859_5:
1635 case XML_CHAR_ENCODING_8859_6:
1636 case XML_CHAR_ENCODING_8859_7:
1637 case XML_CHAR_ENCODING_8859_8:
1638 case XML_CHAR_ENCODING_8859_9:
1639 /*
1640 * We used to keep the internal content in the
1641 * document encoding however this turns being unmaintainable
1642 * So xmlGetCharEncodingHandler() will return non-null
1643 * values for this now.
1644 */
1645 if ((ctxt->inputNr == 1) &&
1646 (ctxt->encoding == NULL) &&
1647 (ctxt->input->encoding != NULL)) {
1648 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1649 }
1650 ctxt->charset = enc;
1651 return(0);
1652 case XML_CHAR_ENCODING_2022_JP:
1653 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "char encoding ISO-2022-JPnot supported\n");
1657 break;
1658 case XML_CHAR_ENCODING_SHIFT_JIS:
1659 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "char encoding Shift_JIS not supported\n");
1663 break;
1664 case XML_CHAR_ENCODING_EUC_JP:
1665 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1667 ctxt->sax->error(ctxt->userData,
1668 "char encoding EUC-JPnot supported\n");
1669 break;
1670 }
1671 }
1672 if (handler == NULL)
1673 return(-1);
1674 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1675 return(xmlSwitchToEncoding(ctxt, handler));
1676}
1677
1678/**
1679 * xmlSwitchToEncoding:
1680 * @ctxt: the parser context
1681 * @handler: the encoding handler
1682 *
1683 * change the input functions when discovering the character encoding
1684 * of a given entity.
1685 *
1686 * Returns 0 in case of success, -1 otherwise
1687 */
1688int
1689xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1690{
1691 int nbchars;
1692
1693 if (handler != NULL) {
1694 if (ctxt->input != NULL) {
1695 if (ctxt->input->buf != NULL) {
1696 if (ctxt->input->buf->encoder != NULL) {
1697 if (ctxt->input->buf->encoder == handler)
1698 return(0);
1699 /*
1700 * Note: this is a bit dangerous, but that's what it
1701 * takes to use nearly compatible signature for different
1702 * encodings.
1703 */
1704 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1705 ctxt->input->buf->encoder = handler;
1706 return(0);
1707 }
1708 ctxt->input->buf->encoder = handler;
1709
1710 /*
1711 * Is there already some content down the pipe to convert ?
1712 */
1713 if ((ctxt->input->buf->buffer != NULL) &&
1714 (ctxt->input->buf->buffer->use > 0)) {
1715 int processed;
1716
1717 /*
1718 * Specific handling of the Byte Order Mark for
1719 * UTF-16
1720 */
1721 if ((handler->name != NULL) &&
1722 (!strcmp(handler->name, "UTF-16LE")) &&
1723 (ctxt->input->cur[0] == 0xFF) &&
1724 (ctxt->input->cur[1] == 0xFE)) {
1725 ctxt->input->cur += 2;
1726 }
1727 if ((handler->name != NULL) &&
1728 (!strcmp(handler->name, "UTF-16BE")) &&
1729 (ctxt->input->cur[0] == 0xFE) &&
1730 (ctxt->input->cur[1] == 0xFF)) {
1731 ctxt->input->cur += 2;
1732 }
1733
1734 /*
1735 * Shring the current input buffer.
1736 * Move it as the raw buffer and create a new input buffer
1737 */
1738 processed = ctxt->input->cur - ctxt->input->base;
1739 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1740 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1741 ctxt->input->buf->buffer = xmlBufferCreate();
1742
1743 if (ctxt->html) {
1744 /*
1745 * converst as much as possbile of the buffer
1746 */
1747 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1748 ctxt->input->buf->buffer,
1749 ctxt->input->buf->raw);
1750 } else {
1751 /*
1752 * convert just enough to get
1753 * '<?xml version="1.0" encoding="xxx"?>'
1754 * parsed with the autodetected encoding
1755 * into the parser reading buffer.
1756 */
1757 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1758 ctxt->input->buf->buffer,
1759 ctxt->input->buf->raw);
1760 }
1761 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001762 xmlGenericError(xmlGenericErrorContext,
1763 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001764 return(-1);
1765 }
1766 ctxt->input->base =
1767 ctxt->input->cur = ctxt->input->buf->buffer->content;
1768
1769 }
1770 return(0);
1771 } else {
1772 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1773 /*
1774 * When parsing a static memory array one must know the
1775 * size to be able to convert the buffer.
1776 */
1777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1778 ctxt->sax->error(ctxt->userData,
1779 "xmlSwitchEncoding : no input\n");
1780 return(-1);
1781 } else {
1782 int processed;
1783
1784 /*
1785 * Shring the current input buffer.
1786 * Move it as the raw buffer and create a new input buffer
1787 */
1788 processed = ctxt->input->cur - ctxt->input->base;
1789
1790 ctxt->input->buf->raw = xmlBufferCreate();
1791 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1792 ctxt->input->length - processed);
1793 ctxt->input->buf->buffer = xmlBufferCreate();
1794
1795 /*
1796 * convert as much as possible of the raw input
1797 * to the parser reading buffer.
1798 */
1799 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1800 ctxt->input->buf->buffer,
1801 ctxt->input->buf->raw);
1802 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001803 xmlGenericError(xmlGenericErrorContext,
1804 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001805 return(-1);
1806 }
1807
1808 /*
1809 * Conversion succeeded, get rid of the old buffer
1810 */
1811 if ((ctxt->input->free != NULL) &&
1812 (ctxt->input->base != NULL))
1813 ctxt->input->free((xmlChar *) ctxt->input->base);
1814 ctxt->input->base =
1815 ctxt->input->cur = ctxt->input->buf->buffer->content;
1816 }
1817 }
1818 } else {
1819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1820 ctxt->sax->error(ctxt->userData,
1821 "xmlSwitchEncoding : no input\n");
1822 return(-1);
1823 }
1824 /*
1825 * The parsing is now done in UTF8 natively
1826 */
1827 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1828 } else
1829 return(-1);
1830 return(0);
1831
1832}
1833
1834/************************************************************************
1835 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001836 * Commodity functions to handle entities processing *
1837 * *
1838 ************************************************************************/
1839
1840/**
1841 * xmlFreeInputStream:
1842 * @input: an xmlParserInputPtr
1843 *
1844 * Free up an input stream.
1845 */
1846void
1847xmlFreeInputStream(xmlParserInputPtr input) {
1848 if (input == NULL) return;
1849
1850 if (input->filename != NULL) xmlFree((char *) input->filename);
1851 if (input->directory != NULL) xmlFree((char *) input->directory);
1852 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1853 if (input->version != NULL) xmlFree((char *) input->version);
1854 if ((input->free != NULL) && (input->base != NULL))
1855 input->free((xmlChar *) input->base);
1856 if (input->buf != NULL)
1857 xmlFreeParserInputBuffer(input->buf);
1858 memset(input, -1, sizeof(xmlParserInput));
1859 xmlFree(input);
1860}
1861
1862/**
1863 * xmlNewInputStream:
1864 * @ctxt: an XML parser context
1865 *
1866 * Create a new input stream structure
1867 * Returns the new input stream or NULL
1868 */
1869xmlParserInputPtr
1870xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1871 xmlParserInputPtr input;
1872
1873 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1874 if (input == NULL) {
1875 if (ctxt != NULL) {
1876 ctxt->errNo = XML_ERR_NO_MEMORY;
1877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1878 ctxt->sax->error(ctxt->userData,
1879 "malloc: couldn't allocate a new input stream\n");
1880 ctxt->errNo = XML_ERR_NO_MEMORY;
1881 }
1882 return(NULL);
1883 }
1884 memset(input, 0, sizeof(xmlParserInput));
1885 input->line = 1;
1886 input->col = 1;
1887 input->standalone = -1;
1888 return(input);
1889}
1890
1891/**
1892 * xmlNewIOInputStream:
1893 * @ctxt: an XML parser context
1894 * @input: an I/O Input
1895 * @enc: the charset encoding if known
1896 *
1897 * Create a new input stream structure encapsulating the @input into
1898 * a stream suitable for the parser.
1899 *
1900 * Returns the new input stream or NULL
1901 */
1902xmlParserInputPtr
1903xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1904 xmlCharEncoding enc) {
1905 xmlParserInputPtr inputStream;
1906
1907 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001908 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001909 inputStream = xmlNewInputStream(ctxt);
1910 if (inputStream == NULL) {
1911 return(NULL);
1912 }
1913 inputStream->filename = NULL;
1914 inputStream->buf = input;
1915 inputStream->base = inputStream->buf->buffer->content;
1916 inputStream->cur = inputStream->buf->buffer->content;
1917 if (enc != XML_CHAR_ENCODING_NONE) {
1918 xmlSwitchEncoding(ctxt, enc);
1919 }
1920
1921 return(inputStream);
1922}
1923
1924/**
1925 * xmlNewEntityInputStream:
1926 * @ctxt: an XML parser context
1927 * @entity: an Entity pointer
1928 *
1929 * Create a new input stream based on an xmlEntityPtr
1930 *
1931 * Returns the new input stream or NULL
1932 */
1933xmlParserInputPtr
1934xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1935 xmlParserInputPtr input;
1936
1937 if (entity == NULL) {
1938 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1940 ctxt->sax->error(ctxt->userData,
1941 "internal: xmlNewEntityInputStream entity = NULL\n");
1942 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1943 return(NULL);
1944 }
1945 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001946 xmlGenericError(xmlGenericErrorContext,
1947 "new input from entity: %s\n", entity->name);
Daniel Veillardb1059e22000-09-16 14:02:43 +00001948 if (entity->content == NULL) {
1949 switch (entity->etype) {
1950 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1951 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "xmlNewEntityInputStream unparsed entity !\n");
1955 break;
1956 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1957 case XML_EXTERNAL_PARAMETER_ENTITY:
1958 return(xmlLoadExternalEntity((char *) entity->URI,
1959 (char *) entity->ExternalID, ctxt));
1960 case XML_INTERNAL_GENERAL_ENTITY:
1961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962 ctxt->sax->error(ctxt->userData,
1963 "Internal entity %s without content !\n", entity->name);
1964 break;
1965 case XML_INTERNAL_PARAMETER_ENTITY:
1966 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1968 ctxt->sax->error(ctxt->userData,
1969 "Internal parameter entity %s without content !\n", entity->name);
1970 break;
1971 case XML_INTERNAL_PREDEFINED_ENTITY:
1972 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
1975 "Predefined entity %s without content !\n", entity->name);
1976 break;
1977 }
1978 return(NULL);
1979 }
1980 input = xmlNewInputStream(ctxt);
1981 if (input == NULL) {
1982 return(NULL);
1983 }
1984 input->filename = (char *) entity->URI;
1985 input->base = entity->content;
1986 input->cur = entity->content;
1987 input->length = entity->length;
1988 return(input);
1989}
1990
1991/**
1992 * xmlNewStringInputStream:
1993 * @ctxt: an XML parser context
1994 * @buffer: an memory buffer
1995 *
1996 * Create a new input stream based on a memory buffer.
1997 * Returns the new input stream
1998 */
1999xmlParserInputPtr
2000xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2001 xmlParserInputPtr input;
2002
2003 if (buffer == NULL) {
2004 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "internal: xmlNewStringInputStream string = NULL\n");
2008 return(NULL);
2009 }
2010 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002011 xmlGenericError(xmlGenericErrorContext,
2012 "new fixed input: %.30s\n", buffer);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002013 input = xmlNewInputStream(ctxt);
2014 if (input == NULL) {
2015 return(NULL);
2016 }
2017 input->base = buffer;
2018 input->cur = buffer;
2019 input->length = xmlStrlen(buffer);
2020 return(input);
2021}
2022
2023/**
2024 * xmlNewInputFromFile:
2025 * @ctxt: an XML parser context
2026 * @filename: the filename to use as entity
2027 *
2028 * Create a new input stream based on a file.
2029 *
2030 * Returns the new input stream or NULL in case of error
2031 */
2032xmlParserInputPtr
2033xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2034 xmlParserInputBufferPtr buf;
2035 xmlParserInputPtr inputStream;
2036 char *directory = NULL;
2037 xmlChar *URI = NULL;
2038
2039 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002040 xmlGenericError(xmlGenericErrorContext,
2041 "new input from file: %s\n", filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002042 if (ctxt == NULL) return(NULL);
2043 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2044 if (buf == NULL)
2045 return(NULL);
2046
2047 URI = xmlStrdup((xmlChar *) filename);
Daniel Veillard04698d92000-09-17 16:00:22 +00002048 directory = xmlParserGetDirectory((const char *) URI);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002049
2050 inputStream = xmlNewInputStream(ctxt);
2051 if (inputStream == NULL) {
2052 if (directory != NULL) xmlFree((char *) directory);
2053 if (URI != NULL) xmlFree((char *) URI);
2054 return(NULL);
2055 }
2056
Daniel Veillard04698d92000-09-17 16:00:22 +00002057 inputStream->filename = (const char *) URI;
Daniel Veillardb1059e22000-09-16 14:02:43 +00002058 inputStream->directory = directory;
2059 inputStream->buf = buf;
2060
2061 inputStream->base = inputStream->buf->buffer->content;
2062 inputStream->cur = inputStream->buf->buffer->content;
2063 if ((ctxt->directory == NULL) && (directory != NULL))
2064 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2065 return(inputStream);
2066}
2067
2068/************************************************************************
2069 * *
2070 * Commodity functions to handle parser contexts *
2071 * *
2072 ************************************************************************/
2073
2074/**
2075 * xmlInitParserCtxt:
2076 * @ctxt: an XML parser context
2077 *
2078 * Initialize a parser context
2079 */
2080
2081void
2082xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2083{
2084 xmlSAXHandler *sax;
2085
2086 xmlDefaultSAXHandlerInit();
2087
2088 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2089 if (sax == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002090 xmlGenericError(xmlGenericErrorContext,
2091 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002092 }
Daniel Veillard4fb87ee2000-09-19 12:25:59 +00002093 else
2094 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002095
2096 /* Allocate the Input stack */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002097 ctxt->inputTab = (xmlParserInputPtr *)
2098 xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002099 if (ctxt->inputTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002100 xmlGenericError(xmlGenericErrorContext,
2101 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002102 ctxt->inputNr = 0;
2103 ctxt->inputMax = 0;
2104 ctxt->input = NULL;
2105 return;
2106 }
2107 ctxt->inputNr = 0;
2108 ctxt->inputMax = 5;
2109 ctxt->input = NULL;
2110
2111 ctxt->version = NULL;
2112 ctxt->encoding = NULL;
2113 ctxt->standalone = -1;
2114 ctxt->hasExternalSubset = 0;
2115 ctxt->hasPErefs = 0;
2116 ctxt->html = 0;
2117 ctxt->external = 0;
2118 ctxt->instate = XML_PARSER_START;
2119 ctxt->token = 0;
2120 ctxt->directory = NULL;
2121
2122 /* Allocate the Node stack */
2123 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2124 if (ctxt->nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002125 xmlGenericError(xmlGenericErrorContext,
2126 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002127 ctxt->nodeNr = 0;
2128 ctxt->nodeMax = 0;
2129 ctxt->node = NULL;
2130 ctxt->inputNr = 0;
2131 ctxt->inputMax = 0;
2132 ctxt->input = NULL;
2133 return;
2134 }
2135 ctxt->nodeNr = 0;
2136 ctxt->nodeMax = 10;
2137 ctxt->node = NULL;
2138
2139 /* Allocate the Name stack */
2140 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2141 if (ctxt->nameTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002142 xmlGenericError(xmlGenericErrorContext,
2143 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002144 ctxt->nodeNr = 0;
2145 ctxt->nodeMax = 0;
2146 ctxt->node = NULL;
2147 ctxt->inputNr = 0;
2148 ctxt->inputMax = 0;
2149 ctxt->input = NULL;
2150 ctxt->nameNr = 0;
2151 ctxt->nameMax = 0;
2152 ctxt->name = NULL;
2153 return;
2154 }
2155 ctxt->nameNr = 0;
2156 ctxt->nameMax = 10;
2157 ctxt->name = NULL;
2158
2159 /* Allocate the space stack */
2160 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2161 if (ctxt->spaceTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002162 xmlGenericError(xmlGenericErrorContext,
2163 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002164 ctxt->nodeNr = 0;
2165 ctxt->nodeMax = 0;
2166 ctxt->node = NULL;
2167 ctxt->inputNr = 0;
2168 ctxt->inputMax = 0;
2169 ctxt->input = NULL;
2170 ctxt->nameNr = 0;
2171 ctxt->nameMax = 0;
2172 ctxt->name = NULL;
2173 ctxt->spaceNr = 0;
2174 ctxt->spaceMax = 0;
2175 ctxt->space = NULL;
2176 return;
2177 }
2178 ctxt->spaceNr = 1;
2179 ctxt->spaceMax = 10;
2180 ctxt->spaceTab[0] = -1;
2181 ctxt->space = &ctxt->spaceTab[0];
2182
2183 if (sax == NULL) {
2184 ctxt->sax = &xmlDefaultSAXHandler;
2185 } else {
2186 ctxt->sax = sax;
2187 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2188 }
2189 ctxt->userData = ctxt;
2190 ctxt->myDoc = NULL;
2191 ctxt->wellFormed = 1;
2192 ctxt->valid = 1;
Daniel Veillard0f2a53c2001-02-05 17:57:33 +00002193 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
Daniel Veillardb1059e22000-09-16 14:02:43 +00002194 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2195 ctxt->pedantic = xmlPedanticParserDefaultValue;
2196 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2197 ctxt->vctxt.userData = ctxt;
2198 if (ctxt->validate) {
2199 ctxt->vctxt.error = xmlParserValidityError;
2200 if (xmlGetWarningsDefaultValue == 0)
2201 ctxt->vctxt.warning = NULL;
2202 else
2203 ctxt->vctxt.warning = xmlParserValidityWarning;
2204 /* Allocate the Node stack */
2205 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2206 if (ctxt->vctxt.nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002207 xmlGenericError(xmlGenericErrorContext,
2208 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002209 ctxt->vctxt.nodeMax = 0;
2210 ctxt->validate = 0;
2211 ctxt->vctxt.error = NULL;
2212 ctxt->vctxt.warning = NULL;
2213 } else {
2214 ctxt->vctxt.nodeNr = 0;
2215 ctxt->vctxt.nodeMax = 4;
2216 ctxt->vctxt.node = NULL;
2217 }
2218 } else {
2219 ctxt->vctxt.error = NULL;
2220 ctxt->vctxt.warning = NULL;
2221 }
2222 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2223 ctxt->record_info = 0;
2224 ctxt->nbChars = 0;
2225 ctxt->checkIndex = 0;
2226 ctxt->inSubset = 0;
2227 ctxt->errNo = XML_ERR_OK;
2228 ctxt->depth = 0;
2229 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2230 xmlInitNodeInfoSeq(&ctxt->node_seq);
2231}
2232
2233/**
2234 * xmlFreeParserCtxt:
2235 * @ctxt: an XML parser context
2236 *
2237 * Free all the memory used by a parser context. However the parsed
2238 * document in ctxt->myDoc is not freed.
2239 */
2240
2241void
2242xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2243{
2244 xmlParserInputPtr input;
2245 xmlChar *oldname;
2246
2247 if (ctxt == NULL) return;
2248
2249 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2250 xmlFreeInputStream(input);
2251 }
2252 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2253 xmlFree(oldname);
2254 }
2255 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2256 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2257 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2258 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2259 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2260 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2261 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2262 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2263 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2264 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2265 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2266 xmlFree(ctxt->sax);
2267 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2268 xmlFree(ctxt);
2269}
2270
2271/**
2272 * xmlNewParserCtxt:
2273 *
2274 * Allocate and initialize a new parser context.
2275 *
2276 * Returns the xmlParserCtxtPtr or NULL
2277 */
2278
2279xmlParserCtxtPtr
2280xmlNewParserCtxt()
2281{
2282 xmlParserCtxtPtr ctxt;
2283
2284 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2285 if (ctxt == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002286 xmlGenericError(xmlGenericErrorContext,
2287 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002288 perror("malloc");
2289 return(NULL);
2290 }
2291 memset(ctxt, 0, sizeof(xmlParserCtxt));
2292 xmlInitParserCtxt(ctxt);
2293 return(ctxt);
2294}
2295
2296/************************************************************************
2297 * *
2298 * Handling of node informations *
2299 * *
2300 ************************************************************************/
2301
2302/**
2303 * xmlClearParserCtxt:
2304 * @ctxt: an XML parser context
2305 *
2306 * Clear (release owned resources) and reinitialize a parser context
2307 */
2308
2309void
2310xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2311{
2312 xmlClearNodeInfoSeq(&ctxt->node_seq);
2313 xmlInitParserCtxt(ctxt);
2314}
2315
2316/**
2317 * xmlParserFindNodeInfo:
2318 * @ctxt: an XML parser context
2319 * @node: an XML node within the tree
2320 *
2321 * Find the parser node info struct for a given node
2322 *
2323 * Returns an xmlParserNodeInfo block pointer or NULL
2324 */
2325const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2326 const xmlNode* node)
2327{
2328 unsigned long pos;
2329
2330 /* Find position where node should be at */
2331 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2332 if ( ctx->node_seq.buffer[pos].node == node )
2333 return &ctx->node_seq.buffer[pos];
2334 else
2335 return NULL;
2336}
2337
2338
2339/**
2340 * xmlInitNodeInfoSeq:
2341 * @seq: a node info sequence pointer
2342 *
2343 * -- Initialize (set to initial state) node info sequence
2344 */
2345void
2346xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2347{
2348 seq->length = 0;
2349 seq->maximum = 0;
2350 seq->buffer = NULL;
2351}
2352
2353/**
2354 * xmlClearNodeInfoSeq:
2355 * @seq: a node info sequence pointer
2356 *
2357 * -- Clear (release memory and reinitialize) node
2358 * info sequence
2359 */
2360void
2361xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2362{
2363 if ( seq->buffer != NULL )
2364 xmlFree(seq->buffer);
2365 xmlInitNodeInfoSeq(seq);
2366}
2367
2368
2369/**
2370 * xmlParserFindNodeInfoIndex:
2371 * @seq: a node info sequence pointer
2372 * @node: an XML node pointer
2373 *
2374 *
2375 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2376 * the given node is or should be at in a sorted sequence
2377 *
2378 * Returns a long indicating the position of the record
2379 */
2380unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2381 const xmlNode* node)
2382{
2383 unsigned long upper, lower, middle;
2384 int found = 0;
2385
2386 /* Do a binary search for the key */
2387 lower = 1;
2388 upper = seq->length;
2389 middle = 0;
2390 while ( lower <= upper && !found) {
2391 middle = lower + (upper - lower) / 2;
2392 if ( node == seq->buffer[middle - 1].node )
2393 found = 1;
2394 else if ( node < seq->buffer[middle - 1].node )
2395 upper = middle - 1;
2396 else
2397 lower = middle + 1;
2398 }
2399
2400 /* Return position */
2401 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2402 return middle;
2403 else
2404 return middle - 1;
2405}
2406
2407
2408/**
2409 * xmlParserAddNodeInfo:
2410 * @ctxt: an XML parser context
2411 * @info: a node info sequence pointer
2412 *
2413 * Insert node info record into the sorted sequence
2414 */
2415void
2416xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2417 const xmlParserNodeInfo* info)
2418{
2419 unsigned long pos;
2420 static unsigned int block_size = 5;
2421
2422 /* Find pos and check to see if node is already in the sequence */
2423 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2424 if ( pos < ctxt->node_seq.length
2425 && ctxt->node_seq.buffer[pos].node == info->node ) {
2426 ctxt->node_seq.buffer[pos] = *info;
2427 }
2428
2429 /* Otherwise, we need to add new node to buffer */
2430 else {
2431 /* Expand buffer by 5 if needed */
2432 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2433 xmlParserNodeInfo* tmp_buffer;
2434 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2435 *(ctxt->node_seq.maximum + block_size));
2436
2437 if ( ctxt->node_seq.buffer == NULL )
2438 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2439 else
2440 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2441
2442 if ( tmp_buffer == NULL ) {
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2445 ctxt->errNo = XML_ERR_NO_MEMORY;
2446 return;
2447 }
2448 ctxt->node_seq.buffer = tmp_buffer;
2449 ctxt->node_seq.maximum += block_size;
2450 }
2451
2452 /* If position is not at end, move elements out of the way */
2453 if ( pos != ctxt->node_seq.length ) {
2454 unsigned long i;
2455
2456 for ( i = ctxt->node_seq.length; i > pos; i-- )
2457 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2458 }
2459
2460 /* Copy element and increase length */
2461 ctxt->node_seq.buffer[pos] = *info;
2462 ctxt->node_seq.length++;
2463 }
2464}
2465
2466/************************************************************************
2467 * *
2468 * Deprecated functions kept for compatibility *
2469 * *
2470 ************************************************************************/
2471
2472/*
2473 * xmlCheckLanguageID
2474 * @lang: pointer to the string value
2475 *
2476 * Checks that the value conforms to the LanguageID production:
2477 *
2478 * NOTE: this is somewhat deprecated, those productions were removed from
2479 * the XML Second edition.
2480 *
2481 * [33] LanguageID ::= Langcode ('-' Subcode)*
2482 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2483 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2484 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2485 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2486 * [38] Subcode ::= ([a-z] | [A-Z])+
2487 *
2488 * Returns 1 if correct 0 otherwise
2489 **/
2490int
2491xmlCheckLanguageID(const xmlChar *lang) {
2492 const xmlChar *cur = lang;
2493
2494 if (cur == NULL)
2495 return(0);
2496 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2497 ((cur[0] == 'I') && (cur[1] == '-'))) {
2498 /*
2499 * IANA code
2500 */
2501 cur += 2;
2502 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2503 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2504 cur++;
2505 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2506 ((cur[0] == 'X') && (cur[1] == '-'))) {
2507 /*
2508 * User code
2509 */
2510 cur += 2;
2511 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2512 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2513 cur++;
2514 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2515 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2516 /*
2517 * ISO639
2518 */
2519 cur++;
2520 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2521 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2522 cur++;
2523 else
2524 return(0);
2525 } else
2526 return(0);
2527 while (cur[0] != 0) { /* non input consuming */
2528 if (cur[0] != '-')
2529 return(0);
2530 cur++;
2531 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2532 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2533 cur++;
2534 else
2535 return(0);
2536 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2537 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2538 cur++;
2539 }
2540 return(1);
2541}
2542
2543/**
2544 * xmlDecodeEntities:
2545 * @ctxt: the parser context
2546 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2547 * @len: the len to decode (in bytes !), -1 for no size limit
2548 * @end: an end marker xmlChar, 0 if none
2549 * @end2: an end marker xmlChar, 0 if none
2550 * @end3: an end marker xmlChar, 0 if none
2551 *
2552 * This function is deprecated, we now always process entities content
2553 * through xmlStringDecodeEntities
2554 *
2555 * TODO: remove it in next major release.
2556 *
2557 * [67] Reference ::= EntityRef | CharRef
2558 *
2559 * [69] PEReference ::= '%' Name ';'
2560 *
2561 * Returns A newly allocated string with the substitution done. The caller
2562 * must deallocate it !
2563 */
2564xmlChar *
2565xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2566 xmlChar end, xmlChar end2, xmlChar end3) {
2567#if 0
2568 xmlChar *buffer = NULL;
2569 unsigned int buffer_size = 0;
2570 unsigned int nbchars = 0;
2571
2572 xmlChar *current = NULL;
2573 xmlEntityPtr ent;
2574 unsigned int max = (unsigned int) len;
2575 int c,l;
2576#endif
2577
2578 static int deprecated = 0;
2579 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002580 xmlGenericError(xmlGenericErrorContext,
2581 "xmlDecodeEntities() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002582 deprecated = 1;
2583 }
2584
2585#if 0
2586 if (ctxt->depth > 40) {
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData,
2589 "Detected entity reference loop\n");
2590 ctxt->wellFormed = 0;
2591 ctxt->disableSAX = 1;
2592 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2593 return(NULL);
2594 }
2595
2596 /*
2597 * allocate a translation buffer.
2598 */
2599 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2600 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2601 if (buffer == NULL) {
2602 perror("xmlDecodeEntities: malloc failed");
2603 return(NULL);
2604 }
2605
2606 /*
2607 * Ok loop until we reach one of the ending char or a size limit.
2608 */
2609 GROW;
2610 c = CUR_CHAR(l);
2611 while ((nbchars < max) && (c != end) && /* NOTUSED */
2612 (c != end2) && (c != end3)) {
2613 GROW;
2614 if (c == 0) break;
2615 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2616 int val = xmlParseCharRef(ctxt);
2617 COPY_BUF(0,buffer,nbchars,val);
2618 NEXTL(l);
2619 } else if ((c == '&') && (ctxt->token != '&') &&
2620 (what & XML_SUBSTITUTE_REF)) {
2621 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002622 xmlGenericError(xmlGenericErrorContext,
2623 "decoding Entity Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002624 ent = xmlParseEntityRef(ctxt);
2625 if ((ent != NULL) &&
2626 (ctxt->replaceEntities != 0)) {
2627 current = ent->content;
2628 while (*current != 0) { /* non input consuming loop */
2629 buffer[nbchars++] = *current++;
2630 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2631 growBuffer(buffer);
2632 }
2633 }
2634 } else if (ent != NULL) {
2635 const xmlChar *cur = ent->name;
2636
2637 buffer[nbchars++] = '&';
2638 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2639 growBuffer(buffer);
2640 }
2641 while (*cur != 0) { /* non input consuming loop */
2642 buffer[nbchars++] = *cur++;
2643 }
2644 buffer[nbchars++] = ';';
2645 }
2646 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2647 /*
2648 * a PEReference induce to switch the entity flow,
2649 * we break here to flush the current set of chars
2650 * parsed if any. We will be called back later.
2651 */
2652 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002653 xmlGenericError(xmlGenericErrorContext,
2654 "decoding PE Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002655 if (nbchars != 0) break;
2656
2657 xmlParsePEReference(ctxt);
2658
2659 /*
2660 * Pop-up of finished entities.
2661 */
2662 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2663 xmlPopInput(ctxt);
2664
2665 break;
2666 } else {
2667 COPY_BUF(l,buffer,nbchars,c);
2668 NEXTL(l);
2669 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2670 growBuffer(buffer);
2671 }
2672 }
2673 c = CUR_CHAR(l);
2674 }
2675 buffer[nbchars++] = 0;
2676 return(buffer);
2677#endif
2678 return(NULL);
2679}
2680
2681/**
2682 * xmlNamespaceParseNCName:
2683 * @ctxt: an XML parser context
2684 *
2685 * parse an XML namespace name.
2686 *
2687 * TODO: this seems not in use anymore, the namespace handling is done on
2688 * top of the SAX interfaces, i.e. not on raw input.
2689 *
2690 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2691 *
2692 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2693 * CombiningChar | Extender
2694 *
2695 * Returns the namespace name or NULL
2696 */
2697
2698xmlChar *
2699xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2700#if 0
2701 xmlChar buf[XML_MAX_NAMELEN + 5];
2702 int len = 0, l;
2703 int cur = CUR_CHAR(l);
2704#endif
2705
2706 static int deprecated = 0;
2707 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002708 xmlGenericError(xmlGenericErrorContext,
2709 "xmlNamespaceParseNCName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002710 deprecated = 1;
2711 }
2712
2713#if 0
2714 /* load first the value of the char !!! */
2715 GROW;
2716 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2717
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002718xmlGenericError(xmlGenericErrorContext,
2719 "xmlNamespaceParseNCName: reached loop 3\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002720 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2721 (cur == '.') || (cur == '-') ||
2722 (cur == '_') ||
2723 (IS_COMBINING(cur)) ||
2724 (IS_EXTENDER(cur))) {
2725 COPY_BUF(l,buf,len,cur);
2726 NEXTL(l);
2727 cur = CUR_CHAR(l);
2728 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002729 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00002730 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2731 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2732 (cur == '.') || (cur == '-') ||
2733 (cur == '_') ||
2734 (IS_COMBINING(cur)) ||
2735 (IS_EXTENDER(cur))) {
2736 NEXTL(l);
2737 cur = CUR_CHAR(l);
2738 }
2739 break;
2740 }
2741 }
2742 return(xmlStrndup(buf, len));
2743#endif
2744 return(NULL);
2745}
2746
2747/**
2748 * xmlNamespaceParseQName:
2749 * @ctxt: an XML parser context
2750 * @prefix: a xmlChar **
2751 *
2752 * TODO: this seems not in use anymore, the namespace handling is done on
2753 * top of the SAX interfaces, i.e. not on raw input.
2754 *
2755 * parse an XML qualified name
2756 *
2757 * [NS 5] QName ::= (Prefix ':')? LocalPart
2758 *
2759 * [NS 6] Prefix ::= NCName
2760 *
2761 * [NS 7] LocalPart ::= NCName
2762 *
2763 * Returns the local part, and prefix is updated
2764 * to get the Prefix if any.
2765 */
2766
2767xmlChar *
2768xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2769
2770 static int deprecated = 0;
2771 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002772 xmlGenericError(xmlGenericErrorContext,
2773 "xmlNamespaceParseQName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002774 deprecated = 1;
2775 }
2776
2777#if 0
2778 xmlChar *ret = NULL;
2779
2780 *prefix = NULL;
2781 ret = xmlNamespaceParseNCName(ctxt);
2782 if (RAW == ':') {
2783 *prefix = ret;
2784 NEXT;
2785 ret = xmlNamespaceParseNCName(ctxt);
2786 }
2787
2788 return(ret);
2789#endif
2790 return(NULL);
2791}
2792
2793/**
2794 * xmlNamespaceParseNSDef:
2795 * @ctxt: an XML parser context
2796 *
2797 * parse a namespace prefix declaration
2798 *
2799 * TODO: this seems not in use anymore, the namespace handling is done on
2800 * top of the SAX interfaces, i.e. not on raw input.
2801 *
2802 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2803 *
2804 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2805 *
2806 * Returns the namespace name
2807 */
2808
2809xmlChar *
2810xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2811 static int deprecated = 0;
2812 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002813 xmlGenericError(xmlGenericErrorContext,
2814 "xmlNamespaceParseNSDef() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002815 deprecated = 1;
2816 }
2817 return(NULL);
2818#if 0
2819 xmlChar *name = NULL;
2820
2821 if ((RAW == 'x') && (NXT(1) == 'm') &&
2822 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2823 (NXT(4) == 's')) {
2824 SKIP(5);
2825 if (RAW == ':') {
2826 NEXT;
2827 name = xmlNamespaceParseNCName(ctxt);
2828 }
2829 }
2830 return(name);
2831#endif
2832}
2833
2834/**
2835 * xmlParseQuotedString:
2836 * @ctxt: an XML parser context
2837 *
2838 * Parse and return a string between quotes or doublequotes
2839 *
2840 * TODO: Deprecated, to be removed at next drop of binary compatibility
2841 *
2842 * Returns the string parser or NULL.
2843 */
2844xmlChar *
2845xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2846 static int deprecated = 0;
2847 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002848 xmlGenericError(xmlGenericErrorContext,
2849 "xmlParseQuotedString() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002850 deprecated = 1;
2851 }
2852 return(NULL);
2853
2854#if 0
2855 xmlChar *buf = NULL;
2856 int len = 0,l;
2857 int size = XML_PARSER_BUFFER_SIZE;
2858 int c;
2859
2860 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2861 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002862 xmlGenericError(xmlGenericErrorContext,
2863 "malloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002864 return(NULL);
2865 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002866xmlGenericError(xmlGenericErrorContext,
2867 "xmlParseQuotedString: reached loop 4\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002868 if (RAW == '"') {
2869 NEXT;
2870 c = CUR_CHAR(l);
2871 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2872 if (len + 5 >= size) {
2873 size *= 2;
2874 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2875 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002876 xmlGenericError(xmlGenericErrorContext,
2877 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002878 return(NULL);
2879 }
2880 }
2881 COPY_BUF(l,buf,len,c);
2882 NEXTL(l);
2883 c = CUR_CHAR(l);
2884 }
2885 if (c != '"') {
2886 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "String not closed \"%.50s\"\n", buf);
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 } else {
2893 NEXT;
2894 }
2895 } else if (RAW == '\''){
2896 NEXT;
2897 c = CUR;
2898 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2899 if (len + 1 >= size) {
2900 size *= 2;
2901 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2902 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002903 xmlGenericError(xmlGenericErrorContext,
2904 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002905 return(NULL);
2906 }
2907 }
2908 buf[len++] = c;
2909 NEXT;
2910 c = CUR;
2911 }
2912 if (RAW != '\'') {
2913 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2915 ctxt->sax->error(ctxt->userData,
2916 "String not closed \"%.50s\"\n", buf);
2917 ctxt->wellFormed = 0;
2918 ctxt->disableSAX = 1;
2919 } else {
2920 NEXT;
2921 }
2922 }
2923 return(buf);
2924#endif
2925}
2926
2927/**
2928 * xmlParseNamespace:
2929 * @ctxt: an XML parser context
2930 *
2931 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2932 *
2933 * This is what the older xml-name Working Draft specified, a bunch of
2934 * other stuff may still rely on it, so support is still here as
2935 * if it was declared on the root of the Tree:-(
2936 *
2937 * TODO: remove from library
2938 *
2939 * To be removed at next drop of binary compatibility
2940 */
2941
2942void
2943xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2944 static int deprecated = 0;
2945 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002946 xmlGenericError(xmlGenericErrorContext,
2947 "xmlParseNamespace() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002948 deprecated = 1;
2949 }
2950
2951#if 0
2952 xmlChar *href = NULL;
2953 xmlChar *prefix = NULL;
2954 int garbage = 0;
2955
2956 /*
2957 * We just skipped "namespace" or "xml:namespace"
2958 */
2959 SKIP_BLANKS;
2960
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002961xmlGenericError(xmlGenericErrorContext,
2962 "xmlParseNamespace: reached loop 5\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002963 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2964 /*
2965 * We can have "ns" or "prefix" attributes
2966 * Old encoding as 'href' or 'AS' attributes is still supported
2967 */
2968 if ((RAW == 'n') && (NXT(1) == 's')) {
2969 garbage = 0;
2970 SKIP(2);
2971 SKIP_BLANKS;
2972
2973 if (RAW != '=') continue;
2974 NEXT;
2975 SKIP_BLANKS;
2976
2977 href = xmlParseQuotedString(ctxt);
2978 SKIP_BLANKS;
2979 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2980 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2981 garbage = 0;
2982 SKIP(4);
2983 SKIP_BLANKS;
2984
2985 if (RAW != '=') continue;
2986 NEXT;
2987 SKIP_BLANKS;
2988
2989 href = xmlParseQuotedString(ctxt);
2990 SKIP_BLANKS;
2991 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2992 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2993 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2994 garbage = 0;
2995 SKIP(6);
2996 SKIP_BLANKS;
2997
2998 if (RAW != '=') continue;
2999 NEXT;
3000 SKIP_BLANKS;
3001
3002 prefix = xmlParseQuotedString(ctxt);
3003 SKIP_BLANKS;
3004 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3005 garbage = 0;
3006 SKIP(2);
3007 SKIP_BLANKS;
3008
3009 if (RAW != '=') continue;
3010 NEXT;
3011 SKIP_BLANKS;
3012
3013 prefix = xmlParseQuotedString(ctxt);
3014 SKIP_BLANKS;
3015 } else if ((RAW == '?') && (NXT(1) == '>')) {
3016 garbage = 0;
3017 NEXT;
3018 } else {
3019 /*
3020 * Found garbage when parsing the namespace
3021 */
3022 if (!garbage) {
3023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024 ctxt->sax->error(ctxt->userData,
3025 "xmlParseNamespace found garbage\n");
3026 }
3027 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 NEXT;
3031 }
3032 }
3033
3034 MOVETO_ENDTAG(CUR_PTR);
3035 NEXT;
3036
3037 /*
3038 * Register the DTD.
3039 if (href != NULL)
3040 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3041 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3042 */
3043
3044 if (prefix != NULL) xmlFree(prefix);
3045 if (href != NULL) xmlFree(href);
3046#endif
3047}
3048
3049/**
3050 * xmlScanName:
3051 * @ctxt: an XML parser context
3052 *
3053 * Trickery: parse an XML name but without consuming the input flow
3054 * Needed for rollback cases. Used only when parsing entities references.
3055 *
3056 * TODO: seems deprecated now, only used in the default part of
3057 * xmlParserHandleReference
3058 *
3059 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3060 * CombiningChar | Extender
3061 *
3062 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3063 *
3064 * [6] Names ::= Name (S Name)*
3065 *
3066 * Returns the Name parsed or NULL
3067 */
3068
3069xmlChar *
3070xmlScanName(xmlParserCtxtPtr ctxt) {
3071 static int deprecated = 0;
3072 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003073 xmlGenericError(xmlGenericErrorContext,
3074 "xmlScanName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003075 deprecated = 1;
3076 }
3077 return(NULL);
3078
3079#if 0
3080 xmlChar buf[XML_MAX_NAMELEN];
3081 int len = 0;
3082
3083 GROW;
3084 if (!IS_LETTER(RAW) && (RAW != '_') &&
3085 (RAW != ':')) {
3086 return(NULL);
3087 }
3088
3089
3090 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3091 (NXT(len) == '.') || (NXT(len) == '-') ||
3092 (NXT(len) == '_') || (NXT(len) == ':') ||
3093 (IS_COMBINING(NXT(len))) ||
3094 (IS_EXTENDER(NXT(len)))) {
3095 GROW;
3096 buf[len] = NXT(len);
3097 len++;
3098 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003099 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00003100 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3101 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3102 (IS_DIGIT(NXT(len))) ||
3103 (NXT(len) == '.') || (NXT(len) == '-') ||
3104 (NXT(len) == '_') || (NXT(len) == ':') ||
3105 (IS_COMBINING(NXT(len))) ||
3106 (IS_EXTENDER(NXT(len))))
3107 len++;
3108 break;
3109 }
3110 }
3111 return(xmlStrndup(buf, len));
3112#endif
3113}
3114
3115/**
3116 * xmlParserHandleReference:
3117 * @ctxt: the parser context
3118 *
3119 * TODO: Remove, now deprecated ... the test is done directly in the
3120 * content parsing
3121 * routines.
3122 *
3123 * [67] Reference ::= EntityRef | CharRef
3124 *
3125 * [68] EntityRef ::= '&' Name ';'
3126 *
3127 * [ WFC: Entity Declared ]
3128 * the Name given in the entity reference must match that in an entity
3129 * declaration, except that well-formed documents need not declare any
3130 * of the following entities: amp, lt, gt, apos, quot.
3131 *
3132 * [ WFC: Parsed Entity ]
3133 * An entity reference must not contain the name of an unparsed entity
3134 *
3135 * [66] CharRef ::= '&#' [0-9]+ ';' |
3136 * '&#x' [0-9a-fA-F]+ ';'
3137 *
3138 * A PEReference may have been detectect in the current input stream
3139 * the handling is done accordingly to
3140 * http://www.w3.org/TR/REC-xml#entproc
3141 */
3142void
3143xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3144 static int deprecated = 0;
3145 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003146 xmlGenericError(xmlGenericErrorContext,
3147 "xmlParserHandleReference() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003148 deprecated = 1;
3149 }
3150
3151#if 0
3152 xmlParserInputPtr input;
3153 xmlChar *name;
3154 xmlEntityPtr ent = NULL;
3155
3156 if (ctxt->token != 0) {
3157 return;
3158 }
3159 if (RAW != '&') return;
3160 GROW;
3161 if ((RAW == '&') && (NXT(1) == '#')) {
3162 switch(ctxt->instate) {
3163 case XML_PARSER_ENTITY_DECL:
3164 case XML_PARSER_PI:
3165 case XML_PARSER_CDATA_SECTION:
3166 case XML_PARSER_COMMENT:
3167 case XML_PARSER_SYSTEM_LITERAL:
3168 /* we just ignore it there */
3169 return;
3170 case XML_PARSER_START_TAG:
3171 return;
3172 case XML_PARSER_END_TAG:
3173 return;
3174 case XML_PARSER_EOF:
3175 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3178 ctxt->wellFormed = 0;
3179 ctxt->disableSAX = 1;
3180 return;
3181 case XML_PARSER_PROLOG:
3182 case XML_PARSER_START:
3183 case XML_PARSER_MISC:
3184 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3186 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3187 ctxt->wellFormed = 0;
3188 ctxt->disableSAX = 1;
3189 return;
3190 case XML_PARSER_EPILOG:
3191 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3194 ctxt->wellFormed = 0;
3195 ctxt->disableSAX = 1;
3196 return;
3197 case XML_PARSER_DTD:
3198 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "CharRef are forbiden in DTDs!\n");
3202 ctxt->wellFormed = 0;
3203 ctxt->disableSAX = 1;
3204 return;
3205 case XML_PARSER_ENTITY_VALUE:
3206 /*
3207 * NOTE: in the case of entity values, we don't do the
3208 * substitution here since we need the literal
3209 * entity value to be able to save the internal
3210 * subset of the document.
3211 * This will be handled by xmlStringDecodeEntities
3212 */
3213 return;
3214 case XML_PARSER_CONTENT:
3215 return;
3216 case XML_PARSER_ATTRIBUTE_VALUE:
3217 /* ctxt->token = xmlParseCharRef(ctxt); */
3218 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003219 case XML_PARSER_IGNORE:
3220 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003221 }
3222 return;
3223 }
3224
3225 switch(ctxt->instate) {
3226 case XML_PARSER_CDATA_SECTION:
3227 return;
3228 case XML_PARSER_PI:
3229 case XML_PARSER_COMMENT:
3230 case XML_PARSER_SYSTEM_LITERAL:
3231 case XML_PARSER_CONTENT:
3232 return;
3233 case XML_PARSER_START_TAG:
3234 return;
3235 case XML_PARSER_END_TAG:
3236 return;
3237 case XML_PARSER_EOF:
3238 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3240 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3241 ctxt->wellFormed = 0;
3242 ctxt->disableSAX = 1;
3243 return;
3244 case XML_PARSER_PROLOG:
3245 case XML_PARSER_START:
3246 case XML_PARSER_MISC:
3247 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3249 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3250 ctxt->wellFormed = 0;
3251 ctxt->disableSAX = 1;
3252 return;
3253 case XML_PARSER_EPILOG:
3254 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3257 ctxt->wellFormed = 0;
3258 ctxt->disableSAX = 1;
3259 return;
3260 case XML_PARSER_ENTITY_VALUE:
3261 /*
3262 * NOTE: in the case of entity values, we don't do the
3263 * substitution here since we need the literal
3264 * entity value to be able to save the internal
3265 * subset of the document.
3266 * This will be handled by xmlStringDecodeEntities
3267 */
3268 return;
3269 case XML_PARSER_ATTRIBUTE_VALUE:
3270 /*
3271 * NOTE: in the case of attributes values, we don't do the
3272 * substitution here unless we are in a mode where
3273 * the parser is explicitely asked to substitute
3274 * entities. The SAX callback is called with values
3275 * without entity substitution.
3276 * This will then be handled by xmlStringDecodeEntities
3277 */
3278 return;
3279 case XML_PARSER_ENTITY_DECL:
3280 /*
3281 * we just ignore it there
3282 * the substitution will be done once the entity is referenced
3283 */
3284 return;
3285 case XML_PARSER_DTD:
3286 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3288 ctxt->sax->error(ctxt->userData,
3289 "Entity references are forbiden in DTDs!\n");
3290 ctxt->wellFormed = 0;
3291 ctxt->disableSAX = 1;
3292 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003293 case XML_PARSER_IGNORE:
3294 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003295 }
3296
3297/* TODO: this seems not reached anymore .... Verify ... */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003298xmlGenericError(xmlGenericErrorContext,
3299 "Reached deprecated section in xmlParserHandleReference()\n");
3300xmlGenericError(xmlGenericErrorContext,
3301 "Please forward the document to Daniel.Veillard@w3.org\n");
3302xmlGenericError(xmlGenericErrorContext,
3303 "indicating the version: %s, thanks !\n", xmlParserVersion);
Daniel Veillardb1059e22000-09-16 14:02:43 +00003304 NEXT;
3305 name = xmlScanName(ctxt);
3306 if (name == NULL) {
3307 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3309 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3310 ctxt->wellFormed = 0;
3311 ctxt->disableSAX = 1;
3312 ctxt->token = '&';
3313 return;
3314 }
3315 if (NXT(xmlStrlen(name)) != ';') {
3316 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3318 ctxt->sax->error(ctxt->userData,
3319 "Entity reference: ';' expected\n");
3320 ctxt->wellFormed = 0;
3321 ctxt->disableSAX = 1;
3322 ctxt->token = '&';
3323 xmlFree(name);
3324 return;
3325 }
3326 SKIP(xmlStrlen(name) + 1);
3327 if (ctxt->sax != NULL) {
3328 if (ctxt->sax->getEntity != NULL)
3329 ent = ctxt->sax->getEntity(ctxt->userData, name);
3330 }
3331
3332 /*
3333 * [ WFC: Entity Declared ]
3334 * the Name given in the entity reference must match that in an entity
3335 * declaration, except that well-formed documents need not declare any
3336 * of the following entities: amp, lt, gt, apos, quot.
3337 */
3338 if (ent == NULL)
3339 ent = xmlGetPredefinedEntity(name);
3340 if (ent == NULL) {
3341 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "Entity reference: entity %s not declared\n",
3345 name);
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 xmlFree(name);
3349 return;
3350 }
3351
3352 /*
3353 * [ WFC: Parsed Entity ]
3354 * An entity reference must not contain the name of an unparsed entity
3355 */
3356 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3357 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3359 ctxt->sax->error(ctxt->userData,
3360 "Entity reference to unparsed entity %s\n", name);
3361 ctxt->wellFormed = 0;
3362 ctxt->disableSAX = 1;
3363 }
3364
3365 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3366 ctxt->token = ent->content[0];
3367 xmlFree(name);
3368 return;
3369 }
3370 input = xmlNewEntityInputStream(ctxt, ent);
3371 xmlPushInput(ctxt, input);
3372 xmlFree(name);
3373#endif
3374 return;
3375}
3376
3377/**
3378 * xmlHandleEntity:
3379 * @ctxt: an XML parser context
3380 * @entity: an XML entity pointer.
3381 *
3382 * Default handling of defined entities, when should we define a new input
3383 * stream ? When do we just handle that as a set of chars ?
3384 *
3385 * OBSOLETE: to be removed at some point.
3386 */
3387
3388void
3389xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3390 static int deprecated = 0;
3391 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003392 xmlGenericError(xmlGenericErrorContext,
3393 "xmlHandleEntity() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003394 deprecated = 1;
3395 }
3396
3397#if 0
3398 int len;
3399 xmlParserInputPtr input;
3400
3401 if (entity->content == NULL) {
3402 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3404 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3405 entity->name);
3406 ctxt->wellFormed = 0;
3407 ctxt->disableSAX = 1;
3408 return;
3409 }
3410 len = xmlStrlen(entity->content);
3411 if (len <= 2) goto handle_as_char;
3412
3413 /*
3414 * Redefine its content as an input stream.
3415 */
3416 input = xmlNewEntityInputStream(ctxt, entity);
3417 xmlPushInput(ctxt, input);
3418 return;
3419
3420handle_as_char:
3421 /*
3422 * Just handle the content as a set of chars.
3423 */
3424 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3425 (ctxt->sax->characters != NULL))
3426 ctxt->sax->characters(ctxt->userData, entity->content, len);
3427#endif
3428}
3429
Daniel Veillarda4964b72000-10-31 18:23:44 +00003430/**
3431 * xmlNewGlobalNs:
3432 * @doc: the document carrying the namespace
3433 * @href: the URI associated
3434 * @prefix: the prefix for the namespace
3435 *
3436 * Creation of a Namespace, the old way using PI and without scoping
3437 * DEPRECATED !!!
3438 * It now create a namespace on the root element of the document if found.
3439 * Returns NULL this functionnality had been removed
3440 */
3441xmlNsPtr
3442xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3443 static int deprecated = 0;
3444 if (!deprecated) {
3445 xmlGenericError(xmlGenericErrorContext,
3446 "xmlNewGlobalNs() deprecated function reached\n");
3447 deprecated = 1;
3448 }
3449 return(NULL);
3450#if 0
3451 xmlNodePtr root;
3452
3453 xmlNsPtr cur;
3454
3455 root = xmlDocGetRootElement(doc);
3456 if (root != NULL)
3457 return(xmlNewNs(root, href, prefix));
3458
3459 /*
3460 * if there is no root element yet, create an old Namespace type
3461 * and it will be moved to the root at save time.
3462 */
3463 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3464 if (cur == NULL) {
3465 xmlGenericError(xmlGenericErrorContext,
3466 "xmlNewGlobalNs : malloc failed\n");
3467 return(NULL);
3468 }
3469 memset(cur, 0, sizeof(xmlNs));
3470 cur->type = XML_GLOBAL_NAMESPACE;
3471
3472 if (href != NULL)
3473 cur->href = xmlStrdup(href);
3474 if (prefix != NULL)
3475 cur->prefix = xmlStrdup(prefix);
3476
3477 /*
3478 * Add it at the end to preserve parsing order ...
3479 */
3480 if (doc != NULL) {
3481 if (doc->oldNs == NULL) {
3482 doc->oldNs = cur;
3483 } else {
3484 xmlNsPtr prev = doc->oldNs;
3485
3486 while (prev->next != NULL) prev = prev->next;
3487 prev->next = cur;
3488 }
3489 }
3490
3491 return(NULL);
3492#endif
3493}
3494
3495/**
3496 * xmlUpgradeOldNs:
3497 * @doc: a document pointer
3498 *
3499 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3500 * DEPRECATED
3501 */
3502void
3503xmlUpgradeOldNs(xmlDocPtr doc) {
3504 static int deprecated = 0;
3505 if (!deprecated) {
3506 xmlGenericError(xmlGenericErrorContext,
3507 "xmlNewGlobalNs() deprecated function reached\n");
3508 deprecated = 1;
3509 }
3510#if 0
3511 xmlNsPtr cur;
3512
3513 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3514 if (doc->children == NULL) {
3515#ifdef DEBUG_TREE
3516 xmlGenericError(xmlGenericErrorContext,
3517 "xmlUpgradeOldNs: failed no root !\n");
3518#endif
3519 return;
3520 }
3521
3522 cur = doc->oldNs;
3523 while (cur->next != NULL) {
3524 cur->type = XML_LOCAL_NAMESPACE;
3525 cur = cur->next;
3526 }
3527 cur->type = XML_LOCAL_NAMESPACE;
3528 cur->next = doc->children->nsDef;
3529 doc->children->nsDef = doc->oldNs;
3530 doc->oldNs = NULL;
3531#endif
3532}
3533