blob: 37e3c7bebede7a5087b9d3cc7583122aae70d2e6 [file] [log] [blame]
Daniel Veillardb1059e22000-09-16 14:02:43 +00001/*
2 * parser.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * Daniel.Veillard@w3.org
8 */
9
10#ifdef WIN32
11#include "win32config.h"
12#define XML_DIR_SEP '\\'
13#else
14#include "config.h"
15#define XML_DIR_SEP '/'
16#endif
17
18#include <stdio.h>
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
Daniel Veillardaaf58b92000-10-06 14:07:26 +000042#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000044#include <libxml/entities.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000045#include <libxml/xmlerror.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000046#include <libxml/encoding.h>
47#include <libxml/valid.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000048#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
Daniel Veillardb1059e22000-09-16 14:02:43 +000050
51
52/************************************************************************
53 * *
54 * Version and Features handling *
55 * *
56 ************************************************************************/
57const char *xmlParserVersion = LIBXML_VERSION_STRING;
58
59/*
60 * xmlCheckVersion:
61 * @version: the include version number
62 *
63 * check the compiled lib version against the include one.
64 * This can warn or immediately kill the application
65 */
66void
67xmlCheckVersion(int version) {
68 int myversion = (int) LIBXML_VERSION;
69
70 if ((myversion / 10000) != (version / 10000)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000071 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000072 "Fatal: program compiled against libxml %d using libxml %d\n",
73 (version / 10000), (myversion / 10000));
74 exit(1);
75 }
76 if ((myversion / 100) < (version / 100)) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000077 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +000078 "Warning: program compiled against libxml %d using older %d\n",
79 (version / 100), (myversion / 100));
80 }
81}
82
83
84const char *xmlFeaturesList[] = {
85 "validate",
86 "keep blanks",
87 "disable SAX",
88 "fetch external entities",
89 "substitute entities",
90 "gather line info",
91 "user data",
92 "is html",
93 "is standalone",
94 "stop parser",
95 "document",
96 "is well formed",
97 "is valid",
98 "SAX block",
99 "SAX function internalSubset",
100 "SAX function isStandalone",
101 "SAX function hasInternalSubset",
102 "SAX function hasExternalSubset",
103 "SAX function resolveEntity",
104 "SAX function getEntity",
105 "SAX function entityDecl",
106 "SAX function notationDecl",
107 "SAX function attributeDecl",
108 "SAX function elementDecl",
109 "SAX function unparsedEntityDecl",
110 "SAX function setDocumentLocator",
111 "SAX function startDocument",
112 "SAX function endDocument",
113 "SAX function startElement",
114 "SAX function endElement",
115 "SAX function reference",
116 "SAX function characters",
117 "SAX function ignorableWhitespace",
118 "SAX function processingInstruction",
119 "SAX function comment",
120 "SAX function warning",
121 "SAX function error",
122 "SAX function fatalError",
123 "SAX function getParameterEntity",
124 "SAX function cdataBlock",
125 "SAX function externalSubset",
126};
127
128/*
129 * xmlGetFeaturesList:
130 * @len: the length of the features name array (input/output)
131 * @result: an array of string to be filled with the features name.
132 *
133 * Copy at most *@len feature names into the @result array
134 *
135 * Returns -1 in case or error, or the total number of features,
136 * len is updated with the number of strings copied,
137 * strings must not be deallocated
138 */
139int
140xmlGetFeaturesList(int *len, const char **result) {
141 int ret, i;
142
143 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
144 if ((len == NULL) || (result == NULL))
145 return(ret);
146 if ((*len < 0) || (*len >= 1000))
147 return(-1);
148 if (*len > ret)
149 *len = ret;
150 for (i = 0;i < *len;i++)
151 result[i] = xmlFeaturesList[i];
152 return(ret);
153}
154
155/*
156 * xmlGetFeature:
157 * @ctxt: an XML/HTML parser context
158 * @name: the feature name
159 * @result: location to store the result
160 *
161 * Read the current value of one feature of this parser instance
162 *
163 * Returns -1 in case or error, 0 otherwise
164 */
165int
166xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
167 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
168 return(-1);
169
170 if (!strcmp(name, "validate")) {
171 *((int *) result) = ctxt->validate;
172 } else if (!strcmp(name, "keep blanks")) {
173 *((int *) result) = ctxt->keepBlanks;
174 } else if (!strcmp(name, "disable SAX")) {
175 *((int *) result) = ctxt->disableSAX;
176 } else if (!strcmp(name, "fetch external entities")) {
177 *((int *) result) = ctxt->validate;
178 } else if (!strcmp(name, "substitute entities")) {
179 *((int *) result) = ctxt->replaceEntities;
180 } else if (!strcmp(name, "gather line info")) {
181 *((int *) result) = ctxt->record_info;
182 } else if (!strcmp(name, "user data")) {
183 *((void **)result) = ctxt->userData;
184 } else if (!strcmp(name, "is html")) {
185 *((int *) result) = ctxt->html;
186 } else if (!strcmp(name, "is standalone")) {
187 *((int *) result) = ctxt->standalone;
188 } else if (!strcmp(name, "document")) {
189 *((xmlDocPtr *) result) = ctxt->myDoc;
190 } else if (!strcmp(name, "is well formed")) {
191 *((int *) result) = ctxt->wellFormed;
192 } else if (!strcmp(name, "is valid")) {
193 *((int *) result) = ctxt->valid;
194 } else if (!strcmp(name, "SAX block")) {
195 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
196 } else if (!strcmp(name, "SAX function internalSubset")) {
197 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
198 } else if (!strcmp(name, "SAX function isStandalone")) {
199 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
200 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
201 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
202 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
203 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
204 } else if (!strcmp(name, "SAX function resolveEntity")) {
205 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
206 } else if (!strcmp(name, "SAX function getEntity")) {
207 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
208 } else if (!strcmp(name, "SAX function entityDecl")) {
209 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
210 } else if (!strcmp(name, "SAX function notationDecl")) {
211 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
212 } else if (!strcmp(name, "SAX function attributeDecl")) {
213 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
214 } else if (!strcmp(name, "SAX function elementDecl")) {
215 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
216 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
217 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
218 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
219 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
220 } else if (!strcmp(name, "SAX function startDocument")) {
221 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
222 } else if (!strcmp(name, "SAX function endDocument")) {
223 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
224 } else if (!strcmp(name, "SAX function startElement")) {
225 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
226 } else if (!strcmp(name, "SAX function endElement")) {
227 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
228 } else if (!strcmp(name, "SAX function reference")) {
229 *((referenceSAXFunc *) result) = ctxt->sax->reference;
230 } else if (!strcmp(name, "SAX function characters")) {
231 *((charactersSAXFunc *) result) = ctxt->sax->characters;
232 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
233 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
234 } else if (!strcmp(name, "SAX function processingInstruction")) {
235 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
236 } else if (!strcmp(name, "SAX function comment")) {
237 *((commentSAXFunc *) result) = ctxt->sax->comment;
238 } else if (!strcmp(name, "SAX function warning")) {
239 *((warningSAXFunc *) result) = ctxt->sax->warning;
240 } else if (!strcmp(name, "SAX function error")) {
241 *((errorSAXFunc *) result) = ctxt->sax->error;
242 } else if (!strcmp(name, "SAX function fatalError")) {
243 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
244 } else if (!strcmp(name, "SAX function getParameterEntity")) {
245 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
246 } else if (!strcmp(name, "SAX function cdataBlock")) {
247 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
248 } else if (!strcmp(name, "SAX function externalSubset")) {
249 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
250 } else {
251 return(-1);
252 }
253 return(0);
254}
255
256/*
257 * xmlSetFeature:
258 * @ctxt: an XML/HTML parser context
259 * @name: the feature name
260 * @value: pointer to the location of the new value
261 *
262 * Change the current value of one feature of this parser instance
263 *
264 * Returns -1 in case or error, 0 otherwise
265 */
266int
267xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
268 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
269 return(-1);
270
271 if (!strcmp(name, "validate")) {
272 ctxt->validate = *((int *) value);
273 } else if (!strcmp(name, "keep blanks")) {
274 ctxt->keepBlanks = *((int *) value);
275 } else if (!strcmp(name, "disable SAX")) {
276 ctxt->disableSAX = *((int *) value);
277 } else if (!strcmp(name, "fetch external entities")) {
278 int newvalid = *((int *) value);
279 if ((!ctxt->validate) && (newvalid != 0)) {
280 if (ctxt->vctxt.warning == NULL)
281 ctxt->vctxt.warning = xmlParserValidityWarning;
282 if (ctxt->vctxt.error == NULL)
283 ctxt->vctxt.error = xmlParserValidityError;
284 /* Allocate the Node stack */
285 ctxt->vctxt.nodeTab = (xmlNodePtr *)
286 xmlMalloc(4 * sizeof(xmlNodePtr));
287 if (ctxt->vctxt.nodeTab == NULL) {
288 ctxt->vctxt.nodeMax = 0;
289 ctxt->validate = 0;
290 return(-1);
291 }
292 ctxt->vctxt.nodeNr = 0;
293 ctxt->vctxt.nodeMax = 4;
294 ctxt->vctxt.node = NULL;
295 }
296 ctxt->validate = newvalid;
297 } else if (!strcmp(name, "substitute entities")) {
298 ctxt->replaceEntities = *((int *) value);
299 } else if (!strcmp(name, "gather line info")) {
300 ctxt->record_info = *((int *) value);
301 } else if (!strcmp(name, "user data")) {
302 ctxt->userData = *((void **)value);
303 } else if (!strcmp(name, "is html")) {
304 ctxt->html = *((int *) value);
305 } else if (!strcmp(name, "is standalone")) {
306 ctxt->standalone = *((int *) value);
307 } else if (!strcmp(name, "document")) {
308 ctxt->myDoc = *((xmlDocPtr *) value);
309 } else if (!strcmp(name, "is well formed")) {
310 ctxt->wellFormed = *((int *) value);
311 } else if (!strcmp(name, "is valid")) {
312 ctxt->valid = *((int *) value);
313 } else if (!strcmp(name, "SAX block")) {
314 ctxt->sax = *((xmlSAXHandlerPtr *) value);
315 } else if (!strcmp(name, "SAX function internalSubset")) {
316 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function isStandalone")) {
318 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
320 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
322 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
323 } else if (!strcmp(name, "SAX function resolveEntity")) {
324 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function getEntity")) {
326 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
327 } else if (!strcmp(name, "SAX function entityDecl")) {
328 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function notationDecl")) {
330 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function attributeDecl")) {
332 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function elementDecl")) {
334 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
336 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
338 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function startDocument")) {
340 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function endDocument")) {
342 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function startElement")) {
344 ctxt->sax->startElement = *((startElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function endElement")) {
346 ctxt->sax->endElement = *((endElementSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function reference")) {
348 ctxt->sax->reference = *((referenceSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function characters")) {
350 ctxt->sax->characters = *((charactersSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
352 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function processingInstruction")) {
354 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function comment")) {
356 ctxt->sax->comment = *((commentSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function warning")) {
358 ctxt->sax->warning = *((warningSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function error")) {
360 ctxt->sax->error = *((errorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function fatalError")) {
362 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function getParameterEntity")) {
364 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
365 } else if (!strcmp(name, "SAX function cdataBlock")) {
366 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function externalSubset")) {
368 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
369 } else {
370 return(-1);
371 }
372 return(0);
373}
374
375/************************************************************************
376 * *
377 * Some functions to avoid too large macros *
378 * *
379 ************************************************************************/
380
381/**
382 * xmlIsChar:
383 * @c: an unicode character (int)
384 *
385 * Check whether the character is allowed by the production
386 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
387 * | [#x10000-#x10FFFF]
388 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
389 * Also available as a macro IS_CHAR()
390 *
391 * Returns 0 if not, non-zero otherwise
392 */
393int
394xmlIsChar(int c) {
395 return(
396 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
397 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
398 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
399 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
400}
401
402/**
403 * xmlIsBlank:
404 * @c: an unicode character (int)
405 *
406 * Check whether the character is allowed by the production
407 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
408 * Also available as a macro IS_BLANK()
409 *
410 * Returns 0 if not, non-zero otherwise
411 */
412int
413xmlIsBlank(int c) {
414 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415}
416
417/**
418 * xmlIsBaseChar:
419 * @c: an unicode character (int)
420 *
421 * Check whether the character is allowed by the production
422 * [85] BaseChar ::= ... long list see REC ...
423 *
424 * VI is your friend !
425 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
426 * and
427 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
428 *
429 * Returns 0 if not, non-zero otherwise
430 */
Daniel Veillard167bd532001-01-06 21:09:34 +0000431static int xmlBaseArray[] = {
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
436 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
438 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
445 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
447 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
448};
449
Daniel Veillardb1059e22000-09-16 14:02:43 +0000450int
451xmlIsBaseChar(int c) {
452 return(
Daniel Veillard167bd532001-01-06 21:09:34 +0000453 (((c) < 0x0100) ? xmlBaseArray[c] :
454 ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000455 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
456 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
457 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
458 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
459 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
460 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
461 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
462 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
463 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
464 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
465 ((c) == 0x0386) ||
466 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
467 ((c) == 0x038C) ||
468 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
469 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
470 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
471 ((c) == 0x03DA) ||
472 ((c) == 0x03DC) ||
473 ((c) == 0x03DE) ||
474 ((c) == 0x03E0) ||
475 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
476 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
477 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
478 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
479 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
480 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
481 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
482 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
483 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
484 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
485 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
486 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
487 ((c) == 0x0559) ||
488 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
489 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
490 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
491 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
492 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
493 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
494 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
495 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
496 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
497 ((c) == 0x06D5) ||
498 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000499 (((c) >= 0x905) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000500 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
501 ((c) == 0x093D) ||
502 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
503 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
504 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
505 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
506 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
507 ((c) == 0x09B2) ||
508 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
509 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
510 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
511 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
512 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
513 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
514 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
515 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
516 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
517 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
518 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
519 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
520 ((c) == 0x0A5E) ||
521 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
522 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
523 ((c) == 0x0A8D) ||
524 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
525 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
526 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
527 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
528 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
529 ((c) == 0x0ABD) ||
530 ((c) == 0x0AE0) ||
531 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
532 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
533 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
534 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
535 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
536 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
537 ((c) == 0x0B3D) ||
538 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
539 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
540 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
541 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
542 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
543 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
544 ((c) == 0x0B9C) ||
545 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
546 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
547 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
548 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
549 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
550 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
551 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
552 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
553 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
554 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
555 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
556 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
557 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
558 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
559 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
560 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
561 ((c) == 0x0CDE) ||
562 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
563 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
564 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
565 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
566 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
567 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
568 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
569 ((c) == 0x0E30) ||
570 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
571 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
572 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
573 ((c) == 0x0E84) ||
574 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
575 ((c) == 0x0E8A) ||
576 ((c) == 0x0E8D) ||
577 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
578 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
579 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
580 ((c) == 0x0EA5) ||
581 ((c) == 0x0EA7) ||
582 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
583 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
584 ((c) == 0x0EB0) ||
585 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
586 ((c) == 0x0EBD) ||
587 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
588 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
589 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000590 (((c) >= 0x10A0) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000591 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
592 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
593 ((c) == 0x1100) ||
594 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
595 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
596 ((c) == 0x1109) ||
597 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
598 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
599 ((c) == 0x113C) ||
600 ((c) == 0x113E) ||
601 ((c) == 0x1140) ||
602 ((c) == 0x114C) ||
603 ((c) == 0x114E) ||
604 ((c) == 0x1150) ||
605 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
606 ((c) == 0x1159) ||
607 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
608 ((c) == 0x1163) ||
609 ((c) == 0x1165) ||
610 ((c) == 0x1167) ||
611 ((c) == 0x1169) ||
612 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
613 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
614 ((c) == 0x1175) ||
615 ((c) == 0x119E) ||
616 ((c) == 0x11A8) ||
617 ((c) == 0x11AB) ||
618 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
619 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
620 ((c) == 0x11BA) ||
621 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
622 ((c) == 0x11EB) ||
623 ((c) == 0x11F0) ||
624 ((c) == 0x11F9) ||
625 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
626 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
627 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
628 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
629 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
630 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
631 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
632 ((c) == 0x1F59) ||
633 ((c) == 0x1F5B) ||
634 ((c) == 0x1F5D) ||
635 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
636 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
637 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
638 ((c) == 0x1FBE) ||
639 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
640 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
641 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
642 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
643 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
644 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
645 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
646 ((c) == 0x2126) ||
647 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
648 ((c) == 0x212E) ||
649 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
650 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
651 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
652 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
653 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
654}
655
656/**
657 * xmlIsDigit:
658 * @c: an unicode character (int)
659 *
660 * Check whether the character is allowed by the production
661 * [88] Digit ::= ... long list see REC ...
662 *
663 * Returns 0 if not, non-zero otherwise
664 */
665int
666xmlIsDigit(int c) {
667 return(
668 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
669 (((c) >= 0x660) && ( /* accelerator */
670 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
671 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
672 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
673 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
674 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
675 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
676 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
677 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
678 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
679 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
680 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
681 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
682 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
683 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
684}
685
686/**
687 * xmlIsCombining:
688 * @c: an unicode character (int)
689 *
690 * Check whether the character is allowed by the production
691 * [87] CombiningChar ::= ... long list see REC ...
692 *
693 * Returns 0 if not, non-zero otherwise
694 */
695int
696xmlIsCombining(int c) {
697 return(
698 (((c) >= 0x300) && ( /* accelerator */
699 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
700 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
701 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
702 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
703 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
704 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
705 ((c) == 0x05BF) ||
706 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
707 ((c) == 0x05C4) ||
708 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
709 ((c) == 0x0670) ||
710 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
711 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
712 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
713 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
714 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000715 (((c) >= 0x0901) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000716 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
717 ((c) == 0x093C) ||
718 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
719 ((c) == 0x094D) ||
720 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
721 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
722 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
723 ((c) == 0x09BC) ||
724 ((c) == 0x09BE) ||
725 ((c) == 0x09BF) ||
726 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
727 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
728 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
729 ((c) == 0x09D7) ||
730 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000731 (((c) >= 0x0A02) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000732 ((c) == 0x0A02) ||
733 ((c) == 0x0A3C) ||
734 ((c) == 0x0A3E) ||
735 ((c) == 0x0A3F) ||
736 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
737 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
738 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
739 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
740 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
741 ((c) == 0x0ABC) ||
742 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
743 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
744 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
745 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
746 ((c) == 0x0B3C) ||
747 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
748 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
749 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
750 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
751 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
752 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
753 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
754 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
755 ((c) == 0x0BD7) ||
756 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
757 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
758 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
759 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
760 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
761 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
762 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
763 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
764 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
765 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
766 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
767 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
768 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
769 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
770 ((c) == 0x0D57) ||
Daniel Veillarda6d8eb62000-12-27 10:46:47 +0000771 (((c) >= 0x0E31) && ( /* accelerator */
Daniel Veillardb1059e22000-09-16 14:02:43 +0000772 ((c) == 0x0E31) ||
773 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
774 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
775 ((c) == 0x0EB1) ||
776 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
777 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
778 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
779 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
780 ((c) == 0x0F35) ||
781 ((c) == 0x0F37) ||
782 ((c) == 0x0F39) ||
783 ((c) == 0x0F3E) ||
784 ((c) == 0x0F3F) ||
785 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
786 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
787 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
788 ((c) == 0x0F97) ||
789 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
790 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
791 ((c) == 0x0FB9) ||
792 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
793 ((c) == 0x20E1) ||
794 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
795 ((c) == 0x3099) ||
796 ((c) == 0x309A))))))))));
797}
798
799/**
800 * xmlIsExtender:
801 * @c: an unicode character (int)
802 *
803 * Check whether the character is allowed by the production
804 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
805 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
806 * [#x309D-#x309E] | [#x30FC-#x30FE]
807 *
808 * Returns 0 if not, non-zero otherwise
809 */
810int
811xmlIsExtender(int c) {
Daniel Veillard167bd532001-01-06 21:09:34 +0000812 switch (c) {
813 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
814 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
815 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
816 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
817 case 0x30FE:
818 return 1;
819 default:
820 return 0;
821 }
Daniel Veillardb1059e22000-09-16 14:02:43 +0000822}
823
824/**
825 * xmlIsIdeographic:
826 * @c: an unicode character (int)
827 *
828 * Check whether the character is allowed by the production
829 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
830 *
831 * Returns 0 if not, non-zero otherwise
832 */
833int
834xmlIsIdeographic(int c) {
Daniel Veillard167bd532001-01-06 21:09:34 +0000835 return(((c) < 0x0100) ? 0 :
Daniel Veillardb1059e22000-09-16 14:02:43 +0000836 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
837 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
838 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
839 ((c) == 0x3007));
840}
841
842/**
843 * xmlIsLetter:
844 * @c: an unicode character (int)
845 *
846 * Check whether the character is allowed by the production
847 * [84] Letter ::= BaseChar | Ideographic
848 *
849 * Returns 0 if not, non-zero otherwise
850 */
851int
852xmlIsLetter(int c) {
853 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
854}
855
856/**
857 * xmlIsPubidChar:
858 * @c: an unicode character (int)
859 *
860 * Check whether the character is allowed by the production
861 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
862 *
863 * Returns 0 if not, non-zero otherwise
864 */
865int
866xmlIsPubidChar(int c) {
867 return(
868 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
869 (((c) >= 'a') && ((c) <= 'z')) ||
870 (((c) >= 'A') && ((c) <= 'Z')) ||
871 (((c) >= '0') && ((c) <= '9')) ||
872 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
873 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
874 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
875 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
876 ((c) == '$') || ((c) == '_') || ((c) == '%'));
877}
878
879/************************************************************************
880 * *
881 * Input handling functions for progressive parsing *
882 * *
883 ************************************************************************/
884
885/* #define DEBUG_INPUT */
886/* #define DEBUG_STACK */
887/* #define DEBUG_PUSH */
888
889
890/* we need to keep enough input to show errors in context */
891#define LINE_LEN 80
892
893#ifdef DEBUG_INPUT
894#define CHECK_BUFFER(in) check_buffer(in)
895
896void check_buffer(xmlParserInputPtr in) {
897 if (in->base != in->buf->buffer->content) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000898 xmlGenericError(xmlGenericErrorContext,
899 "xmlParserInput: base mismatch problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000900 }
901 if (in->cur < in->base) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000902 xmlGenericError(xmlGenericErrorContext,
903 "xmlParserInput: cur < base problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000904 }
905 if (in->cur > in->base + in->buf->buffer->use) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000906 xmlGenericError(xmlGenericErrorContext,
907 "xmlParserInput: cur > base + use problem\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000908 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000909 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +0000910 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
911 in->buf->buffer->use, in->buf->buffer->size);
912}
913
914#else
915#define CHECK_BUFFER(in)
916#endif
917
918
919/**
920 * xmlParserInputRead:
921 * @in: an XML parser input
922 * @len: an indicative size for the lookahead
923 *
924 * This function refresh the input for the parser. It doesn't try to
925 * preserve pointers to the input buffer, and discard already read data
926 *
927 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
928 * end of this entity
929 */
930int
931xmlParserInputRead(xmlParserInputPtr in, int len) {
932 int ret;
933 int used;
934 int index;
935
936#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000937 xmlGenericError(xmlGenericErrorContext, "Read\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000938#endif
939 if (in->buf == NULL) return(-1);
940 if (in->base == NULL) return(-1);
941 if (in->cur == NULL) return(-1);
942 if (in->buf->buffer == NULL) return(-1);
943 if (in->buf->readcallback == NULL) return(-1);
944
945 CHECK_BUFFER(in);
946
947 used = in->cur - in->buf->buffer->content;
948 ret = xmlBufferShrink(in->buf->buffer, used);
949 if (ret > 0) {
950 in->cur -= ret;
951 in->consumed += ret;
952 }
953 ret = xmlParserInputBufferRead(in->buf, len);
954 if (in->base != in->buf->buffer->content) {
955 /*
956 * the buffer has been realloced
957 */
958 index = in->cur - in->base;
959 in->base = in->buf->buffer->content;
960 in->cur = &in->buf->buffer->content[index];
961 }
962
963 CHECK_BUFFER(in);
964
965 return(ret);
966}
967
968/**
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
972 *
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
975 *
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977 * end of this entity
978 */
979int
980xmlParserInputGrow(xmlParserInputPtr in, int len) {
981 int ret;
982 int index;
983
984#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +0000986#endif
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
991
992 CHECK_BUFFER(in);
993
994 index = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
996
997 CHECK_BUFFER(in);
998
999 return(0);
1000 }
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1003 else
1004 return(0);
1005
1006 /*
1007 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
1008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1011 */
1012 if (in->base != in->buf->buffer->content) {
1013 /*
1014 * the buffer has been realloced
1015 */
1016 index = in->cur - in->base;
1017 in->base = in->buf->buffer->content;
1018 in->cur = &in->buf->buffer->content[index];
1019 }
1020
1021 CHECK_BUFFER(in);
1022
1023 return(ret);
1024}
1025
1026/**
1027 * xmlParserInputShrink:
1028 * @in: an XML parser input
1029 *
1030 * This function removes used input for the parser.
1031 */
1032void
1033xmlParserInputShrink(xmlParserInputPtr in) {
1034 int used;
1035 int ret;
1036 int index;
1037
1038#ifdef DEBUG_INPUT
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001039 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001040#endif
1041 if (in->buf == NULL) return;
1042 if (in->base == NULL) return;
1043 if (in->cur == NULL) return;
1044 if (in->buf->buffer == NULL) return;
1045
1046 CHECK_BUFFER(in);
1047
1048 used = in->cur - in->buf->buffer->content;
Daniel Veillard1baf4122000-10-15 20:38:39 +00001049 /*
1050 * Do not shrink on large buffers whose only a tiny fraction
1051 * was consumned
1052 */
1053 if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1054 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00001055 if (used > INPUT_CHUNK) {
1056 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057 if (ret > 0) {
1058 in->cur -= ret;
1059 in->consumed += ret;
1060 }
1061 }
1062
1063 CHECK_BUFFER(in);
1064
1065 if (in->buf->buffer->use > INPUT_CHUNK) {
1066 return;
1067 }
1068 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1069 if (in->base != in->buf->buffer->content) {
1070 /*
1071 * the buffer has been realloced
1072 */
1073 index = in->cur - in->base;
1074 in->base = in->buf->buffer->content;
1075 in->cur = &in->buf->buffer->content[index];
1076 }
1077
1078 CHECK_BUFFER(in);
1079}
1080
1081/************************************************************************
1082 * *
1083 * UTF8 character input and related functions *
1084 * *
1085 ************************************************************************/
1086
1087/**
1088 * xmlNextChar:
1089 * @ctxt: the XML parser context
1090 *
1091 * Skip to the next char input char.
1092 */
1093
1094void
1095xmlNextChar(xmlParserCtxtPtr ctxt) {
1096 if (ctxt->instate == XML_PARSER_EOF)
1097 return;
1098
1099 /*
1100 * 2.11 End-of-Line Handling
1101 * the literal two-character sequence "#xD#xA" or a standalone
1102 * literal #xD, an XML processor must pass to the application
1103 * the single character #xA.
1104 */
1105 if (ctxt->token != 0) ctxt->token = 0;
1106 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1107 if ((*ctxt->input->cur == 0) &&
1108 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1109 (ctxt->instate != XML_PARSER_COMMENT)) {
1110 /*
1111 * If we are at the end of the current entity and
1112 * the context allows it, we pop consumed entities
1113 * automatically.
1114 * the auto closing should be blocked in other cases
1115 */
1116 xmlPopInput(ctxt);
1117 } else {
1118 if (*(ctxt->input->cur) == '\n') {
1119 ctxt->input->line++; ctxt->input->col = 1;
1120 } else ctxt->input->col++;
1121 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1122 /*
1123 * We are supposed to handle UTF8, check it's valid
1124 * From rfc2044: encoding of the Unicode values on UTF-8:
1125 *
1126 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1127 * 0000 0000-0000 007F 0xxxxxxx
1128 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1129 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1130 *
1131 * Check for the 0x110000 limit too
1132 */
1133 const unsigned char *cur = ctxt->input->cur;
1134 unsigned char c;
1135
1136 c = *cur;
1137 if (c & 0x80) {
1138 if (cur[1] == 0)
1139 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1140 if ((cur[1] & 0xc0) != 0x80)
1141 goto encoding_error;
1142 if ((c & 0xe0) == 0xe0) {
1143 unsigned int val;
1144
1145 if (cur[2] == 0)
1146 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147 if ((cur[2] & 0xc0) != 0x80)
1148 goto encoding_error;
1149 if ((c & 0xf0) == 0xf0) {
1150 if (cur[3] == 0)
1151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152 if (((c & 0xf8) != 0xf0) ||
1153 ((cur[3] & 0xc0) != 0x80))
1154 goto encoding_error;
1155 /* 4-byte code */
1156 ctxt->input->cur += 4;
1157 val = (cur[0] & 0x7) << 18;
1158 val |= (cur[1] & 0x3f) << 12;
1159 val |= (cur[2] & 0x3f) << 6;
1160 val |= cur[3] & 0x3f;
1161 } else {
1162 /* 3-byte code */
1163 ctxt->input->cur += 3;
1164 val = (cur[0] & 0xf) << 12;
1165 val |= (cur[1] & 0x3f) << 6;
1166 val |= cur[2] & 0x3f;
1167 }
1168 if (((val > 0xd7ff) && (val < 0xe000)) ||
1169 ((val > 0xfffd) && (val < 0x10000)) ||
1170 (val >= 0x110000)) {
1171 if ((ctxt->sax != NULL) &&
1172 (ctxt->sax->error != NULL))
1173 ctxt->sax->error(ctxt->userData,
1174 "Char 0x%X out of allowed range\n", val);
1175 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1176 ctxt->wellFormed = 0;
1177 ctxt->disableSAX = 1;
1178 }
1179 } else
1180 /* 2-byte code */
1181 ctxt->input->cur += 2;
1182 } else
1183 /* 1-byte code */
1184 ctxt->input->cur++;
1185 } else {
1186 /*
1187 * Assume it's a fixed lenght encoding (1) with
1188 * a compatibke encoding for the ASCII set, since
1189 * XML constructs only use < 128 chars
1190 */
1191 ctxt->input->cur++;
1192 }
1193 ctxt->nbChars++;
1194 if (*ctxt->input->cur == 0)
1195 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1196 }
1197 } else {
1198 ctxt->input->cur++;
1199 ctxt->nbChars++;
1200 if (*ctxt->input->cur == 0)
1201 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202 }
1203 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1204 xmlParserHandlePEReference(ctxt);
1205 if ((*ctxt->input->cur == 0) &&
1206 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1207 xmlPopInput(ctxt);
1208 return;
1209encoding_error:
1210 /*
1211 * If we detect an UTF8 error that probably mean that the
1212 * input encoding didn't get properly advertized in the
1213 * declaration header. Report the error and switch the encoding
1214 * to ISO-Latin-1 (if you don't like this policy, just declare the
1215 * encoding !)
1216 */
1217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1218 ctxt->sax->error(ctxt->userData,
1219 "Input is not proper UTF-8, indicate encoding !\n");
1220 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1221 ctxt->input->cur[0], ctxt->input->cur[1],
1222 ctxt->input->cur[2], ctxt->input->cur[3]);
1223 }
1224 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1225
1226 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1227 ctxt->input->cur++;
1228 return;
1229}
1230
1231/**
1232 * xmlCurrentChar:
1233 * @ctxt: the XML parser context
1234 * @len: pointer to the length of the char read
1235 *
1236 * The current char value, if using UTF-8 this may actaully span multiple
1237 * bytes in the input buffer. Implement the end of line normalization:
1238 * 2.11 End-of-Line Handling
1239 * Wherever an external parsed entity or the literal entity value
1240 * of an internal parsed entity contains either the literal two-character
1241 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1242 * must pass to the application the single character #xA.
1243 * This behavior can conveniently be produced by normalizing all
1244 * line breaks to #xA on input, before parsing.)
1245 *
1246 * Returns the current char value and its lenght
1247 */
1248
1249int
1250xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1251 if (ctxt->instate == XML_PARSER_EOF)
1252 return(0);
1253
1254 if (ctxt->token != 0) {
1255 *len = 0;
1256 return(ctxt->token);
1257 }
1258 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1259 /*
1260 * We are supposed to handle UTF8, check it's valid
1261 * From rfc2044: encoding of the Unicode values on UTF-8:
1262 *
1263 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1264 * 0000 0000-0000 007F 0xxxxxxx
1265 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1266 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1267 *
1268 * Check for the 0x110000 limit too
1269 */
1270 const unsigned char *cur = ctxt->input->cur;
1271 unsigned char c;
1272 unsigned int val;
1273
1274 c = *cur;
1275 if (c & 0x80) {
1276 if (cur[1] == 0)
1277 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1278 if ((cur[1] & 0xc0) != 0x80)
1279 goto encoding_error;
1280 if ((c & 0xe0) == 0xe0) {
1281
1282 if (cur[2] == 0)
1283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1284 if ((cur[2] & 0xc0) != 0x80)
1285 goto encoding_error;
1286 if ((c & 0xf0) == 0xf0) {
1287 if (cur[3] == 0)
1288 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289 if (((c & 0xf8) != 0xf0) ||
1290 ((cur[3] & 0xc0) != 0x80))
1291 goto encoding_error;
1292 /* 4-byte code */
1293 *len = 4;
1294 val = (cur[0] & 0x7) << 18;
1295 val |= (cur[1] & 0x3f) << 12;
1296 val |= (cur[2] & 0x3f) << 6;
1297 val |= cur[3] & 0x3f;
1298 } else {
1299 /* 3-byte code */
1300 *len = 3;
1301 val = (cur[0] & 0xf) << 12;
1302 val |= (cur[1] & 0x3f) << 6;
1303 val |= cur[2] & 0x3f;
1304 }
1305 } else {
1306 /* 2-byte code */
1307 *len = 2;
1308 val = (cur[0] & 0x1f) << 6;
1309 val |= cur[1] & 0x3f;
1310 }
1311 if (!IS_CHAR(val)) {
1312 if ((ctxt->sax != NULL) &&
1313 (ctxt->sax->error != NULL))
1314 ctxt->sax->error(ctxt->userData,
1315 "Char 0x%X out of allowed range\n", val);
1316 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1317 ctxt->wellFormed = 0;
1318 ctxt->disableSAX = 1;
1319 }
1320 return(val);
1321 } else {
1322 /* 1-byte code */
1323 *len = 1;
1324 if (*ctxt->input->cur == 0xD) {
1325 if (ctxt->input->cur[1] == 0xA) {
1326 ctxt->nbChars++;
1327 ctxt->input->cur++;
1328 }
1329 return(0xA);
1330 }
1331 return((int) *ctxt->input->cur);
1332 }
1333 }
1334 /*
1335 * Assume it's a fixed lenght encoding (1) with
1336 * a compatibke encoding for the ASCII set, since
1337 * XML constructs only use < 128 chars
1338 */
1339 *len = 1;
1340 if (*ctxt->input->cur == 0xD) {
1341 if (ctxt->input->cur[1] == 0xA) {
1342 ctxt->nbChars++;
1343 ctxt->input->cur++;
1344 }
1345 return(0xA);
1346 }
1347 return((int) *ctxt->input->cur);
1348encoding_error:
1349 /*
1350 * If we detect an UTF8 error that probably mean that the
1351 * input encoding didn't get properly advertized in the
1352 * declaration header. Report the error and switch the encoding
1353 * to ISO-Latin-1 (if you don't like this policy, just declare the
1354 * encoding !)
1355 */
1356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1357 ctxt->sax->error(ctxt->userData,
1358 "Input is not proper UTF-8, indicate encoding !\n");
1359 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1360 ctxt->input->cur[0], ctxt->input->cur[1],
1361 ctxt->input->cur[2], ctxt->input->cur[3]);
1362 }
1363 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1364
1365 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1366 *len = 1;
1367 return((int) *ctxt->input->cur);
1368}
1369
1370/**
1371 * xmlStringCurrentChar:
1372 * @ctxt: the XML parser context
1373 * @cur: pointer to the beginning of the char
1374 * @len: pointer to the length of the char read
1375 *
1376 * The current char value, if using UTF-8 this may actaully span multiple
1377 * bytes in the input buffer.
1378 *
1379 * Returns the current char value and its lenght
1380 */
1381
1382int
1383xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1384 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1385 /*
1386 * We are supposed to handle UTF8, check it's valid
1387 * From rfc2044: encoding of the Unicode values on UTF-8:
1388 *
1389 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1390 * 0000 0000-0000 007F 0xxxxxxx
1391 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1392 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1393 *
1394 * Check for the 0x110000 limit too
1395 */
1396 unsigned char c;
1397 unsigned int val;
1398
1399 c = *cur;
1400 if (c & 0x80) {
1401 if ((cur[1] & 0xc0) != 0x80)
1402 goto encoding_error;
1403 if ((c & 0xe0) == 0xe0) {
1404
1405 if ((cur[2] & 0xc0) != 0x80)
1406 goto encoding_error;
1407 if ((c & 0xf0) == 0xf0) {
1408 if (((c & 0xf8) != 0xf0) ||
1409 ((cur[3] & 0xc0) != 0x80))
1410 goto encoding_error;
1411 /* 4-byte code */
1412 *len = 4;
1413 val = (cur[0] & 0x7) << 18;
1414 val |= (cur[1] & 0x3f) << 12;
1415 val |= (cur[2] & 0x3f) << 6;
1416 val |= cur[3] & 0x3f;
1417 } else {
1418 /* 3-byte code */
1419 *len = 3;
1420 val = (cur[0] & 0xf) << 12;
1421 val |= (cur[1] & 0x3f) << 6;
1422 val |= cur[2] & 0x3f;
1423 }
1424 } else {
1425 /* 2-byte code */
1426 *len = 2;
1427 val = (cur[0] & 0x1f) << 6;
1428 val |= cur[2] & 0x3f;
1429 }
1430 if (!IS_CHAR(val)) {
1431 if ((ctxt->sax != NULL) &&
1432 (ctxt->sax->error != NULL))
1433 ctxt->sax->error(ctxt->userData,
1434 "Char 0x%X out of allowed range\n", val);
1435 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1436 ctxt->wellFormed = 0;
1437 ctxt->disableSAX = 1;
1438 }
1439 return(val);
1440 } else {
1441 /* 1-byte code */
1442 *len = 1;
1443 return((int) *cur);
1444 }
1445 }
1446 /*
1447 * Assume it's a fixed lenght encoding (1) with
1448 * a compatibke encoding for the ASCII set, since
1449 * XML constructs only use < 128 chars
1450 */
1451 *len = 1;
1452 return((int) *cur);
1453encoding_error:
1454 /*
1455 * If we detect an UTF8 error that probably mean that the
1456 * input encoding didn't get properly advertized in the
1457 * declaration header. Report the error and switch the encoding
1458 * to ISO-Latin-1 (if you don't like this policy, just declare the
1459 * encoding !)
1460 */
1461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1462 ctxt->sax->error(ctxt->userData,
1463 "Input is not proper UTF-8, indicate encoding !\n");
1464 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1465 ctxt->input->cur[0], ctxt->input->cur[1],
1466 ctxt->input->cur[2], ctxt->input->cur[3]);
1467 }
1468 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1469
1470 *len = 1;
1471 return((int) *cur);
1472}
1473
1474/**
1475 * xmlCopyChar:
1476 * @len: pointer to the length of the char read (or zero)
1477 * @array: pointer to an arry of xmlChar
1478 * @val: the char value
1479 *
1480 * append the char value in the array
1481 *
1482 * Returns the number of xmlChar written
1483 */
1484
1485int
1486xmlCopyChar(int len, xmlChar *out, int val) {
1487 /*
1488 * We are supposed to handle UTF8, check it's valid
1489 * From rfc2044: encoding of the Unicode values on UTF-8:
1490 *
1491 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1492 * 0000 0000-0000 007F 0xxxxxxx
1493 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1494 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1495 */
1496 if (len == 0) {
1497 if (val < 0) len = 0;
1498 else if (val < 0x80) len = 1;
1499 else if (val < 0x800) len = 2;
1500 else if (val < 0x10000) len = 3;
1501 else if (val < 0x110000) len = 4;
1502 if (len == 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001503 xmlGenericError(xmlGenericErrorContext,
1504 "Internal error, xmlCopyChar 0x%X out of bound\n",
Daniel Veillardb1059e22000-09-16 14:02:43 +00001505 val);
1506 return(0);
1507 }
1508 }
1509 if (len > 1) {
1510 int bits;
1511
1512 if (val < 0x80) { *out++= val; bits= -6; }
1513 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1514 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1515 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1516
1517 for ( ; bits >= 0; bits-= 6)
1518 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1519
1520 return(len);
1521 }
1522 *out = (xmlChar) val;
1523 return(1);
1524}
1525
1526/************************************************************************
1527 * *
Daniel Veillard04698d92000-09-17 16:00:22 +00001528 * Commodity functions to switch encodings *
1529 * *
1530 ************************************************************************/
1531
1532/**
1533 * xmlSwitchEncoding:
1534 * @ctxt: the parser context
1535 * @enc: the encoding value (number)
1536 *
1537 * change the input functions when discovering the character encoding
1538 * of a given entity.
1539 *
1540 * Returns 0 in case of success, -1 otherwise
1541 */
1542int
1543xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1544{
1545 xmlCharEncodingHandlerPtr handler;
1546
1547 switch (enc) {
1548 case XML_CHAR_ENCODING_ERROR:
1549 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1551 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1552 ctxt->wellFormed = 0;
1553 ctxt->disableSAX = 1;
1554 break;
1555 case XML_CHAR_ENCODING_NONE:
1556 /* let's assume it's UTF-8 without the XML decl */
1557 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1558 return(0);
1559 case XML_CHAR_ENCODING_UTF8:
1560 /* default encoding, no conversion should be needed */
1561 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1562 return(0);
1563 default:
1564 break;
1565 }
1566 handler = xmlGetCharEncodingHandler(enc);
1567 if (handler == NULL) {
1568 /*
1569 * Default handlers.
1570 */
1571 switch (enc) {
1572 case XML_CHAR_ENCODING_ERROR:
1573 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1576 ctxt->wellFormed = 0;
1577 ctxt->disableSAX = 1;
1578 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1579 break;
1580 case XML_CHAR_ENCODING_NONE:
1581 /* let's assume it's UTF-8 without the XML decl */
1582 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1583 return(0);
1584 case XML_CHAR_ENCODING_UTF8:
1585 case XML_CHAR_ENCODING_ASCII:
1586 /* default encoding, no conversion should be needed */
1587 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588 return(0);
1589 case XML_CHAR_ENCODING_UTF16LE:
1590 break;
1591 case XML_CHAR_ENCODING_UTF16BE:
1592 break;
1593 case XML_CHAR_ENCODING_UCS4LE:
1594 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596 ctxt->sax->error(ctxt->userData,
1597 "char encoding USC4 little endian not supported\n");
1598 break;
1599 case XML_CHAR_ENCODING_UCS4BE:
1600 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602 ctxt->sax->error(ctxt->userData,
1603 "char encoding USC4 big endian not supported\n");
1604 break;
1605 case XML_CHAR_ENCODING_EBCDIC:
1606 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "char encoding EBCDIC not supported\n");
1610 break;
1611 case XML_CHAR_ENCODING_UCS4_2143:
1612 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1614 ctxt->sax->error(ctxt->userData,
1615 "char encoding UCS4 2143 not supported\n");
1616 break;
1617 case XML_CHAR_ENCODING_UCS4_3412:
1618 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620 ctxt->sax->error(ctxt->userData,
1621 "char encoding UCS4 3412 not supported\n");
1622 break;
1623 case XML_CHAR_ENCODING_UCS2:
1624 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626 ctxt->sax->error(ctxt->userData,
1627 "char encoding UCS2 not supported\n");
1628 break;
1629 case XML_CHAR_ENCODING_8859_1:
1630 case XML_CHAR_ENCODING_8859_2:
1631 case XML_CHAR_ENCODING_8859_3:
1632 case XML_CHAR_ENCODING_8859_4:
1633 case XML_CHAR_ENCODING_8859_5:
1634 case XML_CHAR_ENCODING_8859_6:
1635 case XML_CHAR_ENCODING_8859_7:
1636 case XML_CHAR_ENCODING_8859_8:
1637 case XML_CHAR_ENCODING_8859_9:
1638 /*
1639 * We used to keep the internal content in the
1640 * document encoding however this turns being unmaintainable
1641 * So xmlGetCharEncodingHandler() will return non-null
1642 * values for this now.
1643 */
1644 if ((ctxt->inputNr == 1) &&
1645 (ctxt->encoding == NULL) &&
1646 (ctxt->input->encoding != NULL)) {
1647 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1648 }
1649 ctxt->charset = enc;
1650 return(0);
1651 case XML_CHAR_ENCODING_2022_JP:
1652 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData,
1655 "char encoding ISO-2022-JPnot supported\n");
1656 break;
1657 case XML_CHAR_ENCODING_SHIFT_JIS:
1658 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "char encoding Shift_JIS not supported\n");
1662 break;
1663 case XML_CHAR_ENCODING_EUC_JP:
1664 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "char encoding EUC-JPnot supported\n");
1668 break;
1669 }
1670 }
1671 if (handler == NULL)
1672 return(-1);
1673 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1674 return(xmlSwitchToEncoding(ctxt, handler));
1675}
1676
1677/**
1678 * xmlSwitchToEncoding:
1679 * @ctxt: the parser context
1680 * @handler: the encoding handler
1681 *
1682 * change the input functions when discovering the character encoding
1683 * of a given entity.
1684 *
1685 * Returns 0 in case of success, -1 otherwise
1686 */
1687int
1688xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1689{
1690 int nbchars;
1691
1692 if (handler != NULL) {
1693 if (ctxt->input != NULL) {
1694 if (ctxt->input->buf != NULL) {
1695 if (ctxt->input->buf->encoder != NULL) {
1696 if (ctxt->input->buf->encoder == handler)
1697 return(0);
1698 /*
1699 * Note: this is a bit dangerous, but that's what it
1700 * takes to use nearly compatible signature for different
1701 * encodings.
1702 */
1703 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1704 ctxt->input->buf->encoder = handler;
1705 return(0);
1706 }
1707 ctxt->input->buf->encoder = handler;
1708
1709 /*
1710 * Is there already some content down the pipe to convert ?
1711 */
1712 if ((ctxt->input->buf->buffer != NULL) &&
1713 (ctxt->input->buf->buffer->use > 0)) {
1714 int processed;
1715
1716 /*
1717 * Specific handling of the Byte Order Mark for
1718 * UTF-16
1719 */
1720 if ((handler->name != NULL) &&
1721 (!strcmp(handler->name, "UTF-16LE")) &&
1722 (ctxt->input->cur[0] == 0xFF) &&
1723 (ctxt->input->cur[1] == 0xFE)) {
1724 ctxt->input->cur += 2;
1725 }
1726 if ((handler->name != NULL) &&
1727 (!strcmp(handler->name, "UTF-16BE")) &&
1728 (ctxt->input->cur[0] == 0xFE) &&
1729 (ctxt->input->cur[1] == 0xFF)) {
1730 ctxt->input->cur += 2;
1731 }
1732
1733 /*
1734 * Shring the current input buffer.
1735 * Move it as the raw buffer and create a new input buffer
1736 */
1737 processed = ctxt->input->cur - ctxt->input->base;
1738 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1739 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1740 ctxt->input->buf->buffer = xmlBufferCreate();
1741
1742 if (ctxt->html) {
1743 /*
1744 * converst as much as possbile of the buffer
1745 */
1746 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1747 ctxt->input->buf->buffer,
1748 ctxt->input->buf->raw);
1749 } else {
1750 /*
1751 * convert just enough to get
1752 * '<?xml version="1.0" encoding="xxx"?>'
1753 * parsed with the autodetected encoding
1754 * into the parser reading buffer.
1755 */
1756 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1757 ctxt->input->buf->buffer,
1758 ctxt->input->buf->raw);
1759 }
1760 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001761 xmlGenericError(xmlGenericErrorContext,
1762 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001763 return(-1);
1764 }
1765 ctxt->input->base =
1766 ctxt->input->cur = ctxt->input->buf->buffer->content;
1767
1768 }
1769 return(0);
1770 } else {
1771 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1772 /*
1773 * When parsing a static memory array one must know the
1774 * size to be able to convert the buffer.
1775 */
1776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1777 ctxt->sax->error(ctxt->userData,
1778 "xmlSwitchEncoding : no input\n");
1779 return(-1);
1780 } else {
1781 int processed;
1782
1783 /*
1784 * Shring the current input buffer.
1785 * Move it as the raw buffer and create a new input buffer
1786 */
1787 processed = ctxt->input->cur - ctxt->input->base;
1788
1789 ctxt->input->buf->raw = xmlBufferCreate();
1790 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1791 ctxt->input->length - processed);
1792 ctxt->input->buf->buffer = xmlBufferCreate();
1793
1794 /*
1795 * convert as much as possible of the raw input
1796 * to the parser reading buffer.
1797 */
1798 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1799 ctxt->input->buf->buffer,
1800 ctxt->input->buf->raw);
1801 if (nbchars < 0) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001802 xmlGenericError(xmlGenericErrorContext,
1803 "xmlSwitchToEncoding: encoder error\n");
Daniel Veillard04698d92000-09-17 16:00:22 +00001804 return(-1);
1805 }
1806
1807 /*
1808 * Conversion succeeded, get rid of the old buffer
1809 */
1810 if ((ctxt->input->free != NULL) &&
1811 (ctxt->input->base != NULL))
1812 ctxt->input->free((xmlChar *) ctxt->input->base);
1813 ctxt->input->base =
1814 ctxt->input->cur = ctxt->input->buf->buffer->content;
1815 }
1816 }
1817 } else {
1818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1819 ctxt->sax->error(ctxt->userData,
1820 "xmlSwitchEncoding : no input\n");
1821 return(-1);
1822 }
1823 /*
1824 * The parsing is now done in UTF8 natively
1825 */
1826 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1827 } else
1828 return(-1);
1829 return(0);
1830
1831}
1832
1833/************************************************************************
1834 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001835 * Commodity functions to handle entities processing *
1836 * *
1837 ************************************************************************/
1838
1839/**
1840 * xmlFreeInputStream:
1841 * @input: an xmlParserInputPtr
1842 *
1843 * Free up an input stream.
1844 */
1845void
1846xmlFreeInputStream(xmlParserInputPtr input) {
1847 if (input == NULL) return;
1848
1849 if (input->filename != NULL) xmlFree((char *) input->filename);
1850 if (input->directory != NULL) xmlFree((char *) input->directory);
1851 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1852 if (input->version != NULL) xmlFree((char *) input->version);
1853 if ((input->free != NULL) && (input->base != NULL))
1854 input->free((xmlChar *) input->base);
1855 if (input->buf != NULL)
1856 xmlFreeParserInputBuffer(input->buf);
1857 memset(input, -1, sizeof(xmlParserInput));
1858 xmlFree(input);
1859}
1860
1861/**
1862 * xmlNewInputStream:
1863 * @ctxt: an XML parser context
1864 *
1865 * Create a new input stream structure
1866 * Returns the new input stream or NULL
1867 */
1868xmlParserInputPtr
1869xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1870 xmlParserInputPtr input;
1871
1872 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1873 if (input == NULL) {
1874 if (ctxt != NULL) {
1875 ctxt->errNo = XML_ERR_NO_MEMORY;
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "malloc: couldn't allocate a new input stream\n");
1879 ctxt->errNo = XML_ERR_NO_MEMORY;
1880 }
1881 return(NULL);
1882 }
1883 memset(input, 0, sizeof(xmlParserInput));
1884 input->line = 1;
1885 input->col = 1;
1886 input->standalone = -1;
1887 return(input);
1888}
1889
1890/**
1891 * xmlNewIOInputStream:
1892 * @ctxt: an XML parser context
1893 * @input: an I/O Input
1894 * @enc: the charset encoding if known
1895 *
1896 * Create a new input stream structure encapsulating the @input into
1897 * a stream suitable for the parser.
1898 *
1899 * Returns the new input stream or NULL
1900 */
1901xmlParserInputPtr
1902xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1903 xmlCharEncoding enc) {
1904 xmlParserInputPtr inputStream;
1905
1906 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001907 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00001908 inputStream = xmlNewInputStream(ctxt);
1909 if (inputStream == NULL) {
1910 return(NULL);
1911 }
1912 inputStream->filename = NULL;
1913 inputStream->buf = input;
1914 inputStream->base = inputStream->buf->buffer->content;
1915 inputStream->cur = inputStream->buf->buffer->content;
1916 if (enc != XML_CHAR_ENCODING_NONE) {
1917 xmlSwitchEncoding(ctxt, enc);
1918 }
1919
1920 return(inputStream);
1921}
1922
1923/**
1924 * xmlNewEntityInputStream:
1925 * @ctxt: an XML parser context
1926 * @entity: an Entity pointer
1927 *
1928 * Create a new input stream based on an xmlEntityPtr
1929 *
1930 * Returns the new input stream or NULL
1931 */
1932xmlParserInputPtr
1933xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1934 xmlParserInputPtr input;
1935
1936 if (entity == NULL) {
1937 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939 ctxt->sax->error(ctxt->userData,
1940 "internal: xmlNewEntityInputStream entity = NULL\n");
1941 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1942 return(NULL);
1943 }
1944 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001945 xmlGenericError(xmlGenericErrorContext,
1946 "new input from entity: %s\n", entity->name);
Daniel Veillardb1059e22000-09-16 14:02:43 +00001947 if (entity->content == NULL) {
1948 switch (entity->etype) {
1949 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1950 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1952 ctxt->sax->error(ctxt->userData,
1953 "xmlNewEntityInputStream unparsed entity !\n");
1954 break;
1955 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1956 case XML_EXTERNAL_PARAMETER_ENTITY:
1957 return(xmlLoadExternalEntity((char *) entity->URI,
1958 (char *) entity->ExternalID, ctxt));
1959 case XML_INTERNAL_GENERAL_ENTITY:
1960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1961 ctxt->sax->error(ctxt->userData,
1962 "Internal entity %s without content !\n", entity->name);
1963 break;
1964 case XML_INTERNAL_PARAMETER_ENTITY:
1965 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
1968 "Internal parameter entity %s without content !\n", entity->name);
1969 break;
1970 case XML_INTERNAL_PREDEFINED_ENTITY:
1971 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1973 ctxt->sax->error(ctxt->userData,
1974 "Predefined entity %s without content !\n", entity->name);
1975 break;
1976 }
1977 return(NULL);
1978 }
1979 input = xmlNewInputStream(ctxt);
1980 if (input == NULL) {
1981 return(NULL);
1982 }
1983 input->filename = (char *) entity->URI;
1984 input->base = entity->content;
1985 input->cur = entity->content;
1986 input->length = entity->length;
1987 return(input);
1988}
1989
1990/**
1991 * xmlNewStringInputStream:
1992 * @ctxt: an XML parser context
1993 * @buffer: an memory buffer
1994 *
1995 * Create a new input stream based on a memory buffer.
1996 * Returns the new input stream
1997 */
1998xmlParserInputPtr
1999xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2000 xmlParserInputPtr input;
2001
2002 if (buffer == NULL) {
2003 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "internal: xmlNewStringInputStream string = NULL\n");
2007 return(NULL);
2008 }
2009 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002010 xmlGenericError(xmlGenericErrorContext,
2011 "new fixed input: %.30s\n", buffer);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002012 input = xmlNewInputStream(ctxt);
2013 if (input == NULL) {
2014 return(NULL);
2015 }
2016 input->base = buffer;
2017 input->cur = buffer;
2018 input->length = xmlStrlen(buffer);
2019 return(input);
2020}
2021
2022/**
2023 * xmlNewInputFromFile:
2024 * @ctxt: an XML parser context
2025 * @filename: the filename to use as entity
2026 *
2027 * Create a new input stream based on a file.
2028 *
2029 * Returns the new input stream or NULL in case of error
2030 */
2031xmlParserInputPtr
2032xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2033 xmlParserInputBufferPtr buf;
2034 xmlParserInputPtr inputStream;
2035 char *directory = NULL;
2036 xmlChar *URI = NULL;
2037
2038 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002039 xmlGenericError(xmlGenericErrorContext,
2040 "new input from file: %s\n", filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002041 if (ctxt == NULL) return(NULL);
2042 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2043 if (buf == NULL)
2044 return(NULL);
2045
2046 URI = xmlStrdup((xmlChar *) filename);
Daniel Veillard04698d92000-09-17 16:00:22 +00002047 directory = xmlParserGetDirectory((const char *) URI);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002048
2049 inputStream = xmlNewInputStream(ctxt);
2050 if (inputStream == NULL) {
2051 if (directory != NULL) xmlFree((char *) directory);
2052 if (URI != NULL) xmlFree((char *) URI);
2053 return(NULL);
2054 }
2055
Daniel Veillard04698d92000-09-17 16:00:22 +00002056 inputStream->filename = (const char *) URI;
Daniel Veillardb1059e22000-09-16 14:02:43 +00002057 inputStream->directory = directory;
2058 inputStream->buf = buf;
2059
2060 inputStream->base = inputStream->buf->buffer->content;
2061 inputStream->cur = inputStream->buf->buffer->content;
2062 if ((ctxt->directory == NULL) && (directory != NULL))
2063 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2064 return(inputStream);
2065}
2066
2067/************************************************************************
2068 * *
2069 * Commodity functions to handle parser contexts *
2070 * *
2071 ************************************************************************/
2072
2073/**
2074 * xmlInitParserCtxt:
2075 * @ctxt: an XML parser context
2076 *
2077 * Initialize a parser context
2078 */
2079
2080void
2081xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2082{
2083 xmlSAXHandler *sax;
2084
2085 xmlDefaultSAXHandlerInit();
2086
2087 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2088 if (sax == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002089 xmlGenericError(xmlGenericErrorContext,
2090 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002091 }
Daniel Veillard4fb87ee2000-09-19 12:25:59 +00002092 else
2093 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002094
2095 /* Allocate the Input stack */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002096 ctxt->inputTab = (xmlParserInputPtr *)
2097 xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb1059e22000-09-16 14:02:43 +00002098 if (ctxt->inputTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002099 xmlGenericError(xmlGenericErrorContext,
2100 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002101 ctxt->inputNr = 0;
2102 ctxt->inputMax = 0;
2103 ctxt->input = NULL;
2104 return;
2105 }
2106 ctxt->inputNr = 0;
2107 ctxt->inputMax = 5;
2108 ctxt->input = NULL;
2109
2110 ctxt->version = NULL;
2111 ctxt->encoding = NULL;
2112 ctxt->standalone = -1;
2113 ctxt->hasExternalSubset = 0;
2114 ctxt->hasPErefs = 0;
2115 ctxt->html = 0;
2116 ctxt->external = 0;
2117 ctxt->instate = XML_PARSER_START;
2118 ctxt->token = 0;
2119 ctxt->directory = NULL;
2120
2121 /* Allocate the Node stack */
2122 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2123 if (ctxt->nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002124 xmlGenericError(xmlGenericErrorContext,
2125 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002126 ctxt->nodeNr = 0;
2127 ctxt->nodeMax = 0;
2128 ctxt->node = NULL;
2129 ctxt->inputNr = 0;
2130 ctxt->inputMax = 0;
2131 ctxt->input = NULL;
2132 return;
2133 }
2134 ctxt->nodeNr = 0;
2135 ctxt->nodeMax = 10;
2136 ctxt->node = NULL;
2137
2138 /* Allocate the Name stack */
2139 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2140 if (ctxt->nameTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002141 xmlGenericError(xmlGenericErrorContext,
2142 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002143 ctxt->nodeNr = 0;
2144 ctxt->nodeMax = 0;
2145 ctxt->node = NULL;
2146 ctxt->inputNr = 0;
2147 ctxt->inputMax = 0;
2148 ctxt->input = NULL;
2149 ctxt->nameNr = 0;
2150 ctxt->nameMax = 0;
2151 ctxt->name = NULL;
2152 return;
2153 }
2154 ctxt->nameNr = 0;
2155 ctxt->nameMax = 10;
2156 ctxt->name = NULL;
2157
2158 /* Allocate the space stack */
2159 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2160 if (ctxt->spaceTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002163 ctxt->nodeNr = 0;
2164 ctxt->nodeMax = 0;
2165 ctxt->node = NULL;
2166 ctxt->inputNr = 0;
2167 ctxt->inputMax = 0;
2168 ctxt->input = NULL;
2169 ctxt->nameNr = 0;
2170 ctxt->nameMax = 0;
2171 ctxt->name = NULL;
2172 ctxt->spaceNr = 0;
2173 ctxt->spaceMax = 0;
2174 ctxt->space = NULL;
2175 return;
2176 }
2177 ctxt->spaceNr = 1;
2178 ctxt->spaceMax = 10;
2179 ctxt->spaceTab[0] = -1;
2180 ctxt->space = &ctxt->spaceTab[0];
2181
2182 if (sax == NULL) {
2183 ctxt->sax = &xmlDefaultSAXHandler;
2184 } else {
2185 ctxt->sax = sax;
2186 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2187 }
2188 ctxt->userData = ctxt;
2189 ctxt->myDoc = NULL;
2190 ctxt->wellFormed = 1;
2191 ctxt->valid = 1;
2192 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2193 ctxt->pedantic = xmlPedanticParserDefaultValue;
2194 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2195 ctxt->vctxt.userData = ctxt;
2196 if (ctxt->validate) {
2197 ctxt->vctxt.error = xmlParserValidityError;
2198 if (xmlGetWarningsDefaultValue == 0)
2199 ctxt->vctxt.warning = NULL;
2200 else
2201 ctxt->vctxt.warning = xmlParserValidityWarning;
2202 /* Allocate the Node stack */
2203 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2204 if (ctxt->vctxt.nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002205 xmlGenericError(xmlGenericErrorContext,
2206 "xmlInitParserCtxt: out of memory\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002207 ctxt->vctxt.nodeMax = 0;
2208 ctxt->validate = 0;
2209 ctxt->vctxt.error = NULL;
2210 ctxt->vctxt.warning = NULL;
2211 } else {
2212 ctxt->vctxt.nodeNr = 0;
2213 ctxt->vctxt.nodeMax = 4;
2214 ctxt->vctxt.node = NULL;
2215 }
2216 } else {
2217 ctxt->vctxt.error = NULL;
2218 ctxt->vctxt.warning = NULL;
2219 }
2220 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2221 ctxt->record_info = 0;
2222 ctxt->nbChars = 0;
2223 ctxt->checkIndex = 0;
2224 ctxt->inSubset = 0;
2225 ctxt->errNo = XML_ERR_OK;
2226 ctxt->depth = 0;
2227 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2228 xmlInitNodeInfoSeq(&ctxt->node_seq);
2229}
2230
2231/**
2232 * xmlFreeParserCtxt:
2233 * @ctxt: an XML parser context
2234 *
2235 * Free all the memory used by a parser context. However the parsed
2236 * document in ctxt->myDoc is not freed.
2237 */
2238
2239void
2240xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2241{
2242 xmlParserInputPtr input;
2243 xmlChar *oldname;
2244
2245 if (ctxt == NULL) return;
2246
2247 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2248 xmlFreeInputStream(input);
2249 }
2250 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2251 xmlFree(oldname);
2252 }
2253 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2254 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2255 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2256 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2257 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2258 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2259 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2260 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2261 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2262 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2263 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2264 xmlFree(ctxt->sax);
2265 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2266 xmlFree(ctxt);
2267}
2268
2269/**
2270 * xmlNewParserCtxt:
2271 *
2272 * Allocate and initialize a new parser context.
2273 *
2274 * Returns the xmlParserCtxtPtr or NULL
2275 */
2276
2277xmlParserCtxtPtr
2278xmlNewParserCtxt()
2279{
2280 xmlParserCtxtPtr ctxt;
2281
2282 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2283 if (ctxt == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002284 xmlGenericError(xmlGenericErrorContext,
2285 "xmlNewParserCtxt : cannot allocate context\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002286 perror("malloc");
2287 return(NULL);
2288 }
2289 memset(ctxt, 0, sizeof(xmlParserCtxt));
2290 xmlInitParserCtxt(ctxt);
2291 return(ctxt);
2292}
2293
2294/************************************************************************
2295 * *
2296 * Handling of node informations *
2297 * *
2298 ************************************************************************/
2299
2300/**
2301 * xmlClearParserCtxt:
2302 * @ctxt: an XML parser context
2303 *
2304 * Clear (release owned resources) and reinitialize a parser context
2305 */
2306
2307void
2308xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2309{
2310 xmlClearNodeInfoSeq(&ctxt->node_seq);
2311 xmlInitParserCtxt(ctxt);
2312}
2313
2314/**
2315 * xmlParserFindNodeInfo:
2316 * @ctxt: an XML parser context
2317 * @node: an XML node within the tree
2318 *
2319 * Find the parser node info struct for a given node
2320 *
2321 * Returns an xmlParserNodeInfo block pointer or NULL
2322 */
2323const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2324 const xmlNode* node)
2325{
2326 unsigned long pos;
2327
2328 /* Find position where node should be at */
2329 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2330 if ( ctx->node_seq.buffer[pos].node == node )
2331 return &ctx->node_seq.buffer[pos];
2332 else
2333 return NULL;
2334}
2335
2336
2337/**
2338 * xmlInitNodeInfoSeq:
2339 * @seq: a node info sequence pointer
2340 *
2341 * -- Initialize (set to initial state) node info sequence
2342 */
2343void
2344xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2345{
2346 seq->length = 0;
2347 seq->maximum = 0;
2348 seq->buffer = NULL;
2349}
2350
2351/**
2352 * xmlClearNodeInfoSeq:
2353 * @seq: a node info sequence pointer
2354 *
2355 * -- Clear (release memory and reinitialize) node
2356 * info sequence
2357 */
2358void
2359xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2360{
2361 if ( seq->buffer != NULL )
2362 xmlFree(seq->buffer);
2363 xmlInitNodeInfoSeq(seq);
2364}
2365
2366
2367/**
2368 * xmlParserFindNodeInfoIndex:
2369 * @seq: a node info sequence pointer
2370 * @node: an XML node pointer
2371 *
2372 *
2373 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2374 * the given node is or should be at in a sorted sequence
2375 *
2376 * Returns a long indicating the position of the record
2377 */
2378unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2379 const xmlNode* node)
2380{
2381 unsigned long upper, lower, middle;
2382 int found = 0;
2383
2384 /* Do a binary search for the key */
2385 lower = 1;
2386 upper = seq->length;
2387 middle = 0;
2388 while ( lower <= upper && !found) {
2389 middle = lower + (upper - lower) / 2;
2390 if ( node == seq->buffer[middle - 1].node )
2391 found = 1;
2392 else if ( node < seq->buffer[middle - 1].node )
2393 upper = middle - 1;
2394 else
2395 lower = middle + 1;
2396 }
2397
2398 /* Return position */
2399 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2400 return middle;
2401 else
2402 return middle - 1;
2403}
2404
2405
2406/**
2407 * xmlParserAddNodeInfo:
2408 * @ctxt: an XML parser context
2409 * @info: a node info sequence pointer
2410 *
2411 * Insert node info record into the sorted sequence
2412 */
2413void
2414xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2415 const xmlParserNodeInfo* info)
2416{
2417 unsigned long pos;
2418 static unsigned int block_size = 5;
2419
2420 /* Find pos and check to see if node is already in the sequence */
2421 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2422 if ( pos < ctxt->node_seq.length
2423 && ctxt->node_seq.buffer[pos].node == info->node ) {
2424 ctxt->node_seq.buffer[pos] = *info;
2425 }
2426
2427 /* Otherwise, we need to add new node to buffer */
2428 else {
2429 /* Expand buffer by 5 if needed */
2430 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2431 xmlParserNodeInfo* tmp_buffer;
2432 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2433 *(ctxt->node_seq.maximum + block_size));
2434
2435 if ( ctxt->node_seq.buffer == NULL )
2436 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2437 else
2438 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2439
2440 if ( tmp_buffer == NULL ) {
2441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2443 ctxt->errNo = XML_ERR_NO_MEMORY;
2444 return;
2445 }
2446 ctxt->node_seq.buffer = tmp_buffer;
2447 ctxt->node_seq.maximum += block_size;
2448 }
2449
2450 /* If position is not at end, move elements out of the way */
2451 if ( pos != ctxt->node_seq.length ) {
2452 unsigned long i;
2453
2454 for ( i = ctxt->node_seq.length; i > pos; i-- )
2455 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2456 }
2457
2458 /* Copy element and increase length */
2459 ctxt->node_seq.buffer[pos] = *info;
2460 ctxt->node_seq.length++;
2461 }
2462}
2463
2464/************************************************************************
2465 * *
2466 * Deprecated functions kept for compatibility *
2467 * *
2468 ************************************************************************/
2469
2470/*
2471 * xmlCheckLanguageID
2472 * @lang: pointer to the string value
2473 *
2474 * Checks that the value conforms to the LanguageID production:
2475 *
2476 * NOTE: this is somewhat deprecated, those productions were removed from
2477 * the XML Second edition.
2478 *
2479 * [33] LanguageID ::= Langcode ('-' Subcode)*
2480 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2481 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2482 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2483 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2484 * [38] Subcode ::= ([a-z] | [A-Z])+
2485 *
2486 * Returns 1 if correct 0 otherwise
2487 **/
2488int
2489xmlCheckLanguageID(const xmlChar *lang) {
2490 const xmlChar *cur = lang;
2491
2492 if (cur == NULL)
2493 return(0);
2494 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2495 ((cur[0] == 'I') && (cur[1] == '-'))) {
2496 /*
2497 * IANA code
2498 */
2499 cur += 2;
2500 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2501 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2502 cur++;
2503 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2504 ((cur[0] == 'X') && (cur[1] == '-'))) {
2505 /*
2506 * User code
2507 */
2508 cur += 2;
2509 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2510 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2511 cur++;
2512 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2513 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2514 /*
2515 * ISO639
2516 */
2517 cur++;
2518 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2519 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2520 cur++;
2521 else
2522 return(0);
2523 } else
2524 return(0);
2525 while (cur[0] != 0) { /* non input consuming */
2526 if (cur[0] != '-')
2527 return(0);
2528 cur++;
2529 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2530 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2531 cur++;
2532 else
2533 return(0);
2534 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2535 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2536 cur++;
2537 }
2538 return(1);
2539}
2540
2541/**
2542 * xmlDecodeEntities:
2543 * @ctxt: the parser context
2544 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2545 * @len: the len to decode (in bytes !), -1 for no size limit
2546 * @end: an end marker xmlChar, 0 if none
2547 * @end2: an end marker xmlChar, 0 if none
2548 * @end3: an end marker xmlChar, 0 if none
2549 *
2550 * This function is deprecated, we now always process entities content
2551 * through xmlStringDecodeEntities
2552 *
2553 * TODO: remove it in next major release.
2554 *
2555 * [67] Reference ::= EntityRef | CharRef
2556 *
2557 * [69] PEReference ::= '%' Name ';'
2558 *
2559 * Returns A newly allocated string with the substitution done. The caller
2560 * must deallocate it !
2561 */
2562xmlChar *
2563xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2564 xmlChar end, xmlChar end2, xmlChar end3) {
2565#if 0
2566 xmlChar *buffer = NULL;
2567 unsigned int buffer_size = 0;
2568 unsigned int nbchars = 0;
2569
2570 xmlChar *current = NULL;
2571 xmlEntityPtr ent;
2572 unsigned int max = (unsigned int) len;
2573 int c,l;
2574#endif
2575
2576 static int deprecated = 0;
2577 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002578 xmlGenericError(xmlGenericErrorContext,
2579 "xmlDecodeEntities() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002580 deprecated = 1;
2581 }
2582
2583#if 0
2584 if (ctxt->depth > 40) {
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Detected entity reference loop\n");
2588 ctxt->wellFormed = 0;
2589 ctxt->disableSAX = 1;
2590 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2591 return(NULL);
2592 }
2593
2594 /*
2595 * allocate a translation buffer.
2596 */
2597 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2598 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2599 if (buffer == NULL) {
2600 perror("xmlDecodeEntities: malloc failed");
2601 return(NULL);
2602 }
2603
2604 /*
2605 * Ok loop until we reach one of the ending char or a size limit.
2606 */
2607 GROW;
2608 c = CUR_CHAR(l);
2609 while ((nbchars < max) && (c != end) && /* NOTUSED */
2610 (c != end2) && (c != end3)) {
2611 GROW;
2612 if (c == 0) break;
2613 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2614 int val = xmlParseCharRef(ctxt);
2615 COPY_BUF(0,buffer,nbchars,val);
2616 NEXTL(l);
2617 } else if ((c == '&') && (ctxt->token != '&') &&
2618 (what & XML_SUBSTITUTE_REF)) {
2619 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002620 xmlGenericError(xmlGenericErrorContext,
2621 "decoding Entity Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002622 ent = xmlParseEntityRef(ctxt);
2623 if ((ent != NULL) &&
2624 (ctxt->replaceEntities != 0)) {
2625 current = ent->content;
2626 while (*current != 0) { /* non input consuming loop */
2627 buffer[nbchars++] = *current++;
2628 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2629 growBuffer(buffer);
2630 }
2631 }
2632 } else if (ent != NULL) {
2633 const xmlChar *cur = ent->name;
2634
2635 buffer[nbchars++] = '&';
2636 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2637 growBuffer(buffer);
2638 }
2639 while (*cur != 0) { /* non input consuming loop */
2640 buffer[nbchars++] = *cur++;
2641 }
2642 buffer[nbchars++] = ';';
2643 }
2644 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2645 /*
2646 * a PEReference induce to switch the entity flow,
2647 * we break here to flush the current set of chars
2648 * parsed if any. We will be called back later.
2649 */
2650 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002651 xmlGenericError(xmlGenericErrorContext,
2652 "decoding PE Reference\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002653 if (nbchars != 0) break;
2654
2655 xmlParsePEReference(ctxt);
2656
2657 /*
2658 * Pop-up of finished entities.
2659 */
2660 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2661 xmlPopInput(ctxt);
2662
2663 break;
2664 } else {
2665 COPY_BUF(l,buffer,nbchars,c);
2666 NEXTL(l);
2667 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2668 growBuffer(buffer);
2669 }
2670 }
2671 c = CUR_CHAR(l);
2672 }
2673 buffer[nbchars++] = 0;
2674 return(buffer);
2675#endif
2676 return(NULL);
2677}
2678
2679/**
2680 * xmlNamespaceParseNCName:
2681 * @ctxt: an XML parser context
2682 *
2683 * parse an XML namespace name.
2684 *
2685 * TODO: this seems not in use anymore, the namespace handling is done on
2686 * top of the SAX interfaces, i.e. not on raw input.
2687 *
2688 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2689 *
2690 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2691 * CombiningChar | Extender
2692 *
2693 * Returns the namespace name or NULL
2694 */
2695
2696xmlChar *
2697xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2698#if 0
2699 xmlChar buf[XML_MAX_NAMELEN + 5];
2700 int len = 0, l;
2701 int cur = CUR_CHAR(l);
2702#endif
2703
2704 static int deprecated = 0;
2705 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002706 xmlGenericError(xmlGenericErrorContext,
2707 "xmlNamespaceParseNCName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002708 deprecated = 1;
2709 }
2710
2711#if 0
2712 /* load first the value of the char !!! */
2713 GROW;
2714 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2715
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002716xmlGenericError(xmlGenericErrorContext,
2717 "xmlNamespaceParseNCName: reached loop 3\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002718 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2719 (cur == '.') || (cur == '-') ||
2720 (cur == '_') ||
2721 (IS_COMBINING(cur)) ||
2722 (IS_EXTENDER(cur))) {
2723 COPY_BUF(l,buf,len,cur);
2724 NEXTL(l);
2725 cur = CUR_CHAR(l);
2726 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002727 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00002728 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2729 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2730 (cur == '.') || (cur == '-') ||
2731 (cur == '_') ||
2732 (IS_COMBINING(cur)) ||
2733 (IS_EXTENDER(cur))) {
2734 NEXTL(l);
2735 cur = CUR_CHAR(l);
2736 }
2737 break;
2738 }
2739 }
2740 return(xmlStrndup(buf, len));
2741#endif
2742 return(NULL);
2743}
2744
2745/**
2746 * xmlNamespaceParseQName:
2747 * @ctxt: an XML parser context
2748 * @prefix: a xmlChar **
2749 *
2750 * TODO: this seems not in use anymore, the namespace handling is done on
2751 * top of the SAX interfaces, i.e. not on raw input.
2752 *
2753 * parse an XML qualified name
2754 *
2755 * [NS 5] QName ::= (Prefix ':')? LocalPart
2756 *
2757 * [NS 6] Prefix ::= NCName
2758 *
2759 * [NS 7] LocalPart ::= NCName
2760 *
2761 * Returns the local part, and prefix is updated
2762 * to get the Prefix if any.
2763 */
2764
2765xmlChar *
2766xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2767
2768 static int deprecated = 0;
2769 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002770 xmlGenericError(xmlGenericErrorContext,
2771 "xmlNamespaceParseQName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002772 deprecated = 1;
2773 }
2774
2775#if 0
2776 xmlChar *ret = NULL;
2777
2778 *prefix = NULL;
2779 ret = xmlNamespaceParseNCName(ctxt);
2780 if (RAW == ':') {
2781 *prefix = ret;
2782 NEXT;
2783 ret = xmlNamespaceParseNCName(ctxt);
2784 }
2785
2786 return(ret);
2787#endif
2788 return(NULL);
2789}
2790
2791/**
2792 * xmlNamespaceParseNSDef:
2793 * @ctxt: an XML parser context
2794 *
2795 * parse a namespace prefix declaration
2796 *
2797 * TODO: this seems not in use anymore, the namespace handling is done on
2798 * top of the SAX interfaces, i.e. not on raw input.
2799 *
2800 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2801 *
2802 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2803 *
2804 * Returns the namespace name
2805 */
2806
2807xmlChar *
2808xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2809 static int deprecated = 0;
2810 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002811 xmlGenericError(xmlGenericErrorContext,
2812 "xmlNamespaceParseNSDef() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002813 deprecated = 1;
2814 }
2815 return(NULL);
2816#if 0
2817 xmlChar *name = NULL;
2818
2819 if ((RAW == 'x') && (NXT(1) == 'm') &&
2820 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2821 (NXT(4) == 's')) {
2822 SKIP(5);
2823 if (RAW == ':') {
2824 NEXT;
2825 name = xmlNamespaceParseNCName(ctxt);
2826 }
2827 }
2828 return(name);
2829#endif
2830}
2831
2832/**
2833 * xmlParseQuotedString:
2834 * @ctxt: an XML parser context
2835 *
2836 * Parse and return a string between quotes or doublequotes
2837 *
2838 * TODO: Deprecated, to be removed at next drop of binary compatibility
2839 *
2840 * Returns the string parser or NULL.
2841 */
2842xmlChar *
2843xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2844 static int deprecated = 0;
2845 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002846 xmlGenericError(xmlGenericErrorContext,
2847 "xmlParseQuotedString() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002848 deprecated = 1;
2849 }
2850 return(NULL);
2851
2852#if 0
2853 xmlChar *buf = NULL;
2854 int len = 0,l;
2855 int size = XML_PARSER_BUFFER_SIZE;
2856 int c;
2857
2858 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2859 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002860 xmlGenericError(xmlGenericErrorContext,
2861 "malloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002862 return(NULL);
2863 }
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002864xmlGenericError(xmlGenericErrorContext,
2865 "xmlParseQuotedString: reached loop 4\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002866 if (RAW == '"') {
2867 NEXT;
2868 c = CUR_CHAR(l);
2869 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2870 if (len + 5 >= size) {
2871 size *= 2;
2872 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2873 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002874 xmlGenericError(xmlGenericErrorContext,
2875 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002876 return(NULL);
2877 }
2878 }
2879 COPY_BUF(l,buf,len,c);
2880 NEXTL(l);
2881 c = CUR_CHAR(l);
2882 }
2883 if (c != '"') {
2884 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2886 ctxt->sax->error(ctxt->userData,
2887 "String not closed \"%.50s\"\n", buf);
2888 ctxt->wellFormed = 0;
2889 ctxt->disableSAX = 1;
2890 } else {
2891 NEXT;
2892 }
2893 } else if (RAW == '\''){
2894 NEXT;
2895 c = CUR;
2896 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2897 if (len + 1 >= size) {
2898 size *= 2;
2899 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2900 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002901 xmlGenericError(xmlGenericErrorContext,
2902 "realloc of %d byte failed\n", size);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002903 return(NULL);
2904 }
2905 }
2906 buf[len++] = c;
2907 NEXT;
2908 c = CUR;
2909 }
2910 if (RAW != '\'') {
2911 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913 ctxt->sax->error(ctxt->userData,
2914 "String not closed \"%.50s\"\n", buf);
2915 ctxt->wellFormed = 0;
2916 ctxt->disableSAX = 1;
2917 } else {
2918 NEXT;
2919 }
2920 }
2921 return(buf);
2922#endif
2923}
2924
2925/**
2926 * xmlParseNamespace:
2927 * @ctxt: an XML parser context
2928 *
2929 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2930 *
2931 * This is what the older xml-name Working Draft specified, a bunch of
2932 * other stuff may still rely on it, so support is still here as
2933 * if it was declared on the root of the Tree:-(
2934 *
2935 * TODO: remove from library
2936 *
2937 * To be removed at next drop of binary compatibility
2938 */
2939
2940void
2941xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2942 static int deprecated = 0;
2943 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002944 xmlGenericError(xmlGenericErrorContext,
2945 "xmlParseNamespace() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002946 deprecated = 1;
2947 }
2948
2949#if 0
2950 xmlChar *href = NULL;
2951 xmlChar *prefix = NULL;
2952 int garbage = 0;
2953
2954 /*
2955 * We just skipped "namespace" or "xml:namespace"
2956 */
2957 SKIP_BLANKS;
2958
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002959xmlGenericError(xmlGenericErrorContext,
2960 "xmlParseNamespace: reached loop 5\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00002961 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2962 /*
2963 * We can have "ns" or "prefix" attributes
2964 * Old encoding as 'href' or 'AS' attributes is still supported
2965 */
2966 if ((RAW == 'n') && (NXT(1) == 's')) {
2967 garbage = 0;
2968 SKIP(2);
2969 SKIP_BLANKS;
2970
2971 if (RAW != '=') continue;
2972 NEXT;
2973 SKIP_BLANKS;
2974
2975 href = xmlParseQuotedString(ctxt);
2976 SKIP_BLANKS;
2977 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2978 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2979 garbage = 0;
2980 SKIP(4);
2981 SKIP_BLANKS;
2982
2983 if (RAW != '=') continue;
2984 NEXT;
2985 SKIP_BLANKS;
2986
2987 href = xmlParseQuotedString(ctxt);
2988 SKIP_BLANKS;
2989 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2990 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2991 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2992 garbage = 0;
2993 SKIP(6);
2994 SKIP_BLANKS;
2995
2996 if (RAW != '=') continue;
2997 NEXT;
2998 SKIP_BLANKS;
2999
3000 prefix = xmlParseQuotedString(ctxt);
3001 SKIP_BLANKS;
3002 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3003 garbage = 0;
3004 SKIP(2);
3005 SKIP_BLANKS;
3006
3007 if (RAW != '=') continue;
3008 NEXT;
3009 SKIP_BLANKS;
3010
3011 prefix = xmlParseQuotedString(ctxt);
3012 SKIP_BLANKS;
3013 } else if ((RAW == '?') && (NXT(1) == '>')) {
3014 garbage = 0;
3015 NEXT;
3016 } else {
3017 /*
3018 * Found garbage when parsing the namespace
3019 */
3020 if (!garbage) {
3021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3022 ctxt->sax->error(ctxt->userData,
3023 "xmlParseNamespace found garbage\n");
3024 }
3025 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3026 ctxt->wellFormed = 0;
3027 ctxt->disableSAX = 1;
3028 NEXT;
3029 }
3030 }
3031
3032 MOVETO_ENDTAG(CUR_PTR);
3033 NEXT;
3034
3035 /*
3036 * Register the DTD.
3037 if (href != NULL)
3038 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3039 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3040 */
3041
3042 if (prefix != NULL) xmlFree(prefix);
3043 if (href != NULL) xmlFree(href);
3044#endif
3045}
3046
3047/**
3048 * xmlScanName:
3049 * @ctxt: an XML parser context
3050 *
3051 * Trickery: parse an XML name but without consuming the input flow
3052 * Needed for rollback cases. Used only when parsing entities references.
3053 *
3054 * TODO: seems deprecated now, only used in the default part of
3055 * xmlParserHandleReference
3056 *
3057 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3058 * CombiningChar | Extender
3059 *
3060 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3061 *
3062 * [6] Names ::= Name (S Name)*
3063 *
3064 * Returns the Name parsed or NULL
3065 */
3066
3067xmlChar *
3068xmlScanName(xmlParserCtxtPtr ctxt) {
3069 static int deprecated = 0;
3070 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003071 xmlGenericError(xmlGenericErrorContext,
3072 "xmlScanName() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003073 deprecated = 1;
3074 }
3075 return(NULL);
3076
3077#if 0
3078 xmlChar buf[XML_MAX_NAMELEN];
3079 int len = 0;
3080
3081 GROW;
3082 if (!IS_LETTER(RAW) && (RAW != '_') &&
3083 (RAW != ':')) {
3084 return(NULL);
3085 }
3086
3087
3088 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3089 (NXT(len) == '.') || (NXT(len) == '-') ||
3090 (NXT(len) == '_') || (NXT(len) == ':') ||
3091 (IS_COMBINING(NXT(len))) ||
3092 (IS_EXTENDER(NXT(len)))) {
3093 GROW;
3094 buf[len] = NXT(len);
3095 len++;
3096 if (len >= XML_MAX_NAMELEN) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003097 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardb1059e22000-09-16 14:02:43 +00003098 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3099 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3100 (IS_DIGIT(NXT(len))) ||
3101 (NXT(len) == '.') || (NXT(len) == '-') ||
3102 (NXT(len) == '_') || (NXT(len) == ':') ||
3103 (IS_COMBINING(NXT(len))) ||
3104 (IS_EXTENDER(NXT(len))))
3105 len++;
3106 break;
3107 }
3108 }
3109 return(xmlStrndup(buf, len));
3110#endif
3111}
3112
3113/**
3114 * xmlParserHandleReference:
3115 * @ctxt: the parser context
3116 *
3117 * TODO: Remove, now deprecated ... the test is done directly in the
3118 * content parsing
3119 * routines.
3120 *
3121 * [67] Reference ::= EntityRef | CharRef
3122 *
3123 * [68] EntityRef ::= '&' Name ';'
3124 *
3125 * [ WFC: Entity Declared ]
3126 * the Name given in the entity reference must match that in an entity
3127 * declaration, except that well-formed documents need not declare any
3128 * of the following entities: amp, lt, gt, apos, quot.
3129 *
3130 * [ WFC: Parsed Entity ]
3131 * An entity reference must not contain the name of an unparsed entity
3132 *
3133 * [66] CharRef ::= '&#' [0-9]+ ';' |
3134 * '&#x' [0-9a-fA-F]+ ';'
3135 *
3136 * A PEReference may have been detectect in the current input stream
3137 * the handling is done accordingly to
3138 * http://www.w3.org/TR/REC-xml#entproc
3139 */
3140void
3141xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3142 static int deprecated = 0;
3143 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003144 xmlGenericError(xmlGenericErrorContext,
3145 "xmlParserHandleReference() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003146 deprecated = 1;
3147 }
3148
3149#if 0
3150 xmlParserInputPtr input;
3151 xmlChar *name;
3152 xmlEntityPtr ent = NULL;
3153
3154 if (ctxt->token != 0) {
3155 return;
3156 }
3157 if (RAW != '&') return;
3158 GROW;
3159 if ((RAW == '&') && (NXT(1) == '#')) {
3160 switch(ctxt->instate) {
3161 case XML_PARSER_ENTITY_DECL:
3162 case XML_PARSER_PI:
3163 case XML_PARSER_CDATA_SECTION:
3164 case XML_PARSER_COMMENT:
3165 case XML_PARSER_SYSTEM_LITERAL:
3166 /* we just ignore it there */
3167 return;
3168 case XML_PARSER_START_TAG:
3169 return;
3170 case XML_PARSER_END_TAG:
3171 return;
3172 case XML_PARSER_EOF:
3173 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 return;
3179 case XML_PARSER_PROLOG:
3180 case XML_PARSER_START:
3181 case XML_PARSER_MISC:
3182 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3185 ctxt->wellFormed = 0;
3186 ctxt->disableSAX = 1;
3187 return;
3188 case XML_PARSER_EPILOG:
3189 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3191 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3192 ctxt->wellFormed = 0;
3193 ctxt->disableSAX = 1;
3194 return;
3195 case XML_PARSER_DTD:
3196 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3198 ctxt->sax->error(ctxt->userData,
3199 "CharRef are forbiden in DTDs!\n");
3200 ctxt->wellFormed = 0;
3201 ctxt->disableSAX = 1;
3202 return;
3203 case XML_PARSER_ENTITY_VALUE:
3204 /*
3205 * NOTE: in the case of entity values, we don't do the
3206 * substitution here since we need the literal
3207 * entity value to be able to save the internal
3208 * subset of the document.
3209 * This will be handled by xmlStringDecodeEntities
3210 */
3211 return;
3212 case XML_PARSER_CONTENT:
3213 return;
3214 case XML_PARSER_ATTRIBUTE_VALUE:
3215 /* ctxt->token = xmlParseCharRef(ctxt); */
3216 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003217 case XML_PARSER_IGNORE:
3218 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003219 }
3220 return;
3221 }
3222
3223 switch(ctxt->instate) {
3224 case XML_PARSER_CDATA_SECTION:
3225 return;
3226 case XML_PARSER_PI:
3227 case XML_PARSER_COMMENT:
3228 case XML_PARSER_SYSTEM_LITERAL:
3229 case XML_PARSER_CONTENT:
3230 return;
3231 case XML_PARSER_START_TAG:
3232 return;
3233 case XML_PARSER_END_TAG:
3234 return;
3235 case XML_PARSER_EOF:
3236 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3239 ctxt->wellFormed = 0;
3240 ctxt->disableSAX = 1;
3241 return;
3242 case XML_PARSER_PROLOG:
3243 case XML_PARSER_START:
3244 case XML_PARSER_MISC:
3245 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3248 ctxt->wellFormed = 0;
3249 ctxt->disableSAX = 1;
3250 return;
3251 case XML_PARSER_EPILOG:
3252 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 return;
3258 case XML_PARSER_ENTITY_VALUE:
3259 /*
3260 * NOTE: in the case of entity values, we don't do the
3261 * substitution here since we need the literal
3262 * entity value to be able to save the internal
3263 * subset of the document.
3264 * This will be handled by xmlStringDecodeEntities
3265 */
3266 return;
3267 case XML_PARSER_ATTRIBUTE_VALUE:
3268 /*
3269 * NOTE: in the case of attributes values, we don't do the
3270 * substitution here unless we are in a mode where
3271 * the parser is explicitely asked to substitute
3272 * entities. The SAX callback is called with values
3273 * without entity substitution.
3274 * This will then be handled by xmlStringDecodeEntities
3275 */
3276 return;
3277 case XML_PARSER_ENTITY_DECL:
3278 /*
3279 * we just ignore it there
3280 * the substitution will be done once the entity is referenced
3281 */
3282 return;
3283 case XML_PARSER_DTD:
3284 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3286 ctxt->sax->error(ctxt->userData,
3287 "Entity references are forbiden in DTDs!\n");
3288 ctxt->wellFormed = 0;
3289 ctxt->disableSAX = 1;
3290 return;
Daniel Veillard41e06512000-11-13 11:47:47 +00003291 case XML_PARSER_IGNORE:
3292 return;
Daniel Veillardb1059e22000-09-16 14:02:43 +00003293 }
3294
3295/* TODO: this seems not reached anymore .... Verify ... */
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003296xmlGenericError(xmlGenericErrorContext,
3297 "Reached deprecated section in xmlParserHandleReference()\n");
3298xmlGenericError(xmlGenericErrorContext,
3299 "Please forward the document to Daniel.Veillard@w3.org\n");
3300xmlGenericError(xmlGenericErrorContext,
3301 "indicating the version: %s, thanks !\n", xmlParserVersion);
Daniel Veillardb1059e22000-09-16 14:02:43 +00003302 NEXT;
3303 name = xmlScanName(ctxt);
3304 if (name == NULL) {
3305 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 ctxt->token = '&';
3311 return;
3312 }
3313 if (NXT(xmlStrlen(name)) != ';') {
3314 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3316 ctxt->sax->error(ctxt->userData,
3317 "Entity reference: ';' expected\n");
3318 ctxt->wellFormed = 0;
3319 ctxt->disableSAX = 1;
3320 ctxt->token = '&';
3321 xmlFree(name);
3322 return;
3323 }
3324 SKIP(xmlStrlen(name) + 1);
3325 if (ctxt->sax != NULL) {
3326 if (ctxt->sax->getEntity != NULL)
3327 ent = ctxt->sax->getEntity(ctxt->userData, name);
3328 }
3329
3330 /*
3331 * [ WFC: Entity Declared ]
3332 * the Name given in the entity reference must match that in an entity
3333 * declaration, except that well-formed documents need not declare any
3334 * of the following entities: amp, lt, gt, apos, quot.
3335 */
3336 if (ent == NULL)
3337 ent = xmlGetPredefinedEntity(name);
3338 if (ent == NULL) {
3339 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341 ctxt->sax->error(ctxt->userData,
3342 "Entity reference: entity %s not declared\n",
3343 name);
3344 ctxt->wellFormed = 0;
3345 ctxt->disableSAX = 1;
3346 xmlFree(name);
3347 return;
3348 }
3349
3350 /*
3351 * [ WFC: Parsed Entity ]
3352 * An entity reference must not contain the name of an unparsed entity
3353 */
3354 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3355 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "Entity reference to unparsed entity %s\n", name);
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 }
3362
3363 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3364 ctxt->token = ent->content[0];
3365 xmlFree(name);
3366 return;
3367 }
3368 input = xmlNewEntityInputStream(ctxt, ent);
3369 xmlPushInput(ctxt, input);
3370 xmlFree(name);
3371#endif
3372 return;
3373}
3374
3375/**
3376 * xmlHandleEntity:
3377 * @ctxt: an XML parser context
3378 * @entity: an XML entity pointer.
3379 *
3380 * Default handling of defined entities, when should we define a new input
3381 * stream ? When do we just handle that as a set of chars ?
3382 *
3383 * OBSOLETE: to be removed at some point.
3384 */
3385
3386void
3387xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3388 static int deprecated = 0;
3389 if (!deprecated) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00003390 xmlGenericError(xmlGenericErrorContext,
3391 "xmlHandleEntity() deprecated function reached\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00003392 deprecated = 1;
3393 }
3394
3395#if 0
3396 int len;
3397 xmlParserInputPtr input;
3398
3399 if (entity->content == NULL) {
3400 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3402 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3403 entity->name);
3404 ctxt->wellFormed = 0;
3405 ctxt->disableSAX = 1;
3406 return;
3407 }
3408 len = xmlStrlen(entity->content);
3409 if (len <= 2) goto handle_as_char;
3410
3411 /*
3412 * Redefine its content as an input stream.
3413 */
3414 input = xmlNewEntityInputStream(ctxt, entity);
3415 xmlPushInput(ctxt, input);
3416 return;
3417
3418handle_as_char:
3419 /*
3420 * Just handle the content as a set of chars.
3421 */
3422 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3423 (ctxt->sax->characters != NULL))
3424 ctxt->sax->characters(ctxt->userData, entity->content, len);
3425#endif
3426}
3427
Daniel Veillarda4964b72000-10-31 18:23:44 +00003428/**
3429 * xmlNewGlobalNs:
3430 * @doc: the document carrying the namespace
3431 * @href: the URI associated
3432 * @prefix: the prefix for the namespace
3433 *
3434 * Creation of a Namespace, the old way using PI and without scoping
3435 * DEPRECATED !!!
3436 * It now create a namespace on the root element of the document if found.
3437 * Returns NULL this functionnality had been removed
3438 */
3439xmlNsPtr
3440xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3441 static int deprecated = 0;
3442 if (!deprecated) {
3443 xmlGenericError(xmlGenericErrorContext,
3444 "xmlNewGlobalNs() deprecated function reached\n");
3445 deprecated = 1;
3446 }
3447 return(NULL);
3448#if 0
3449 xmlNodePtr root;
3450
3451 xmlNsPtr cur;
3452
3453 root = xmlDocGetRootElement(doc);
3454 if (root != NULL)
3455 return(xmlNewNs(root, href, prefix));
3456
3457 /*
3458 * if there is no root element yet, create an old Namespace type
3459 * and it will be moved to the root at save time.
3460 */
3461 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3462 if (cur == NULL) {
3463 xmlGenericError(xmlGenericErrorContext,
3464 "xmlNewGlobalNs : malloc failed\n");
3465 return(NULL);
3466 }
3467 memset(cur, 0, sizeof(xmlNs));
3468 cur->type = XML_GLOBAL_NAMESPACE;
3469
3470 if (href != NULL)
3471 cur->href = xmlStrdup(href);
3472 if (prefix != NULL)
3473 cur->prefix = xmlStrdup(prefix);
3474
3475 /*
3476 * Add it at the end to preserve parsing order ...
3477 */
3478 if (doc != NULL) {
3479 if (doc->oldNs == NULL) {
3480 doc->oldNs = cur;
3481 } else {
3482 xmlNsPtr prev = doc->oldNs;
3483
3484 while (prev->next != NULL) prev = prev->next;
3485 prev->next = cur;
3486 }
3487 }
3488
3489 return(NULL);
3490#endif
3491}
3492
3493/**
3494 * xmlUpgradeOldNs:
3495 * @doc: a document pointer
3496 *
3497 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3498 * DEPRECATED
3499 */
3500void
3501xmlUpgradeOldNs(xmlDocPtr doc) {
3502 static int deprecated = 0;
3503 if (!deprecated) {
3504 xmlGenericError(xmlGenericErrorContext,
3505 "xmlNewGlobalNs() deprecated function reached\n");
3506 deprecated = 1;
3507 }
3508#if 0
3509 xmlNsPtr cur;
3510
3511 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3512 if (doc->children == NULL) {
3513#ifdef DEBUG_TREE
3514 xmlGenericError(xmlGenericErrorContext,
3515 "xmlUpgradeOldNs: failed no root !\n");
3516#endif
3517 return;
3518 }
3519
3520 cur = doc->oldNs;
3521 while (cur->next != NULL) {
3522 cur->type = XML_LOCAL_NAMESPACE;
3523 cur = cur->next;
3524 }
3525 cur->type = XML_LOCAL_NAMESPACE;
3526 cur->next = doc->children->nsDef;
3527 doc->children->nsDef = doc->oldNs;
3528 doc->oldNs = NULL;
3529#endif
3530}
3531