blob: fb0c38aa2017b1cb2480f947ed17c5cc22d945bc [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01004 * Related specs:
Owen Taylor3473f882001-02-23 17:55:21 +00005 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +010061#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
Owen Taylor3473f882001-02-23 17:55:21 +000062#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000073/**
74 * xmlEncodingErrMemory:
75 * @extra: extra informations
76 *
77 * Handle an out of memory condition
78 */
79static void
80xmlEncodingErrMemory(const char *extra)
81{
82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83}
84
85/**
86 * xmlErrEncoding:
87 * @error: the error number
88 * @msg: the error message
89 *
90 * n encoding error
91 */
92static void
93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94{
95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96 XML_FROM_I18N, error, XML_ERR_FATAL,
97 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98}
Daniel Veillard97ac1312001-05-30 19:14:17 +000099
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100100#ifdef LIBXML_ICU_ENABLED
101static uconv_t*
102openIcuConverter(const char* name, int toUnicode)
103{
104 UErrorCode status = U_ZERO_ERROR;
105 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
106 if (conv == NULL)
107 return NULL;
108
109 conv->uconv = ucnv_open(name, &status);
110 if (U_FAILURE(status))
111 goto error;
112
113 status = U_ZERO_ERROR;
114 if (toUnicode) {
115 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
116 NULL, NULL, NULL, &status);
117 }
118 else {
119 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
121 }
122 if (U_FAILURE(status))
123 goto error;
124
125 status = U_ZERO_ERROR;
126 conv->utf8 = ucnv_open("UTF-8", &status);
127 if (U_SUCCESS(status))
128 return conv;
129
130error:
131 if (conv->uconv)
132 ucnv_close(conv->uconv);
133 xmlFree(conv);
134 return NULL;
135}
136
137static void
138closeIcuConverter(uconv_t *conv)
139{
140 if (conv != NULL) {
141 ucnv_close(conv->uconv);
142 ucnv_close(conv->utf8);
143 xmlFree(conv);
144 }
145}
146#endif /* LIBXML_ICU_ENABLED */
147
Daniel Veillard97ac1312001-05-30 19:14:17 +0000148/************************************************************************
149 * *
150 * Conversions To/From UTF8 encoding *
151 * *
152 ************************************************************************/
153
154/**
Owen Taylor3473f882001-02-23 17:55:21 +0000155 * asciiToUTF8:
156 * @out: a pointer to an array of bytes to store the result
157 * @outlen: the length of @out
158 * @in: a pointer to an array of ASCII chars
159 * @inlen: the length of @in
160 *
161 * Take a block of ASCII chars in and try to convert it to an UTF-8
162 * block of chars out.
163 * Returns 0 if success, or -1 otherwise
164 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000165 * if the return value is positive, else unpredictable.
166 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000167 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000168static int
Owen Taylor3473f882001-02-23 17:55:21 +0000169asciiToUTF8(unsigned char* out, int *outlen,
170 const unsigned char* in, int *inlen) {
171 unsigned char* outstart = out;
172 const unsigned char* base = in;
173 const unsigned char* processed = in;
174 unsigned char* outend = out + *outlen;
175 const unsigned char* inend;
176 unsigned int c;
Owen Taylor3473f882001-02-23 17:55:21 +0000177
178 inend = in + (*inlen);
179 while ((in < inend) && (out - outstart + 5 < *outlen)) {
180 c= *in++;
181
Owen Taylor3473f882001-02-23 17:55:21 +0000182 if (out >= outend)
183 break;
Daniel Veillard2728f842006-03-09 16:49:24 +0000184 if (c < 0x80) {
185 *out++ = c;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100186 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000187 *outlen = out - outstart;
188 *inlen = processed - base;
189 return(-1);
190 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100191
Owen Taylor3473f882001-02-23 17:55:21 +0000192 processed = (const unsigned char*) in;
193 }
194 *outlen = out - outstart;
195 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000196 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000197}
198
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000199#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000200/**
201 * UTF8Toascii:
202 * @out: a pointer to an array of bytes to store the result
203 * @outlen: the length of @out
204 * @in: a pointer to an array of UTF-8 chars
205 * @inlen: the length of @in
206 *
207 * Take a block of UTF-8 chars in and try to convert it to an ASCII
208 * block of chars out.
209 *
210 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
211 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000212 * if the return value is positive, else unpredictable.
213 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000214 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int
Owen Taylor3473f882001-02-23 17:55:21 +0000216UTF8Toascii(unsigned char* out, int *outlen,
217 const unsigned char* in, int *inlen) {
218 const unsigned char* processed = in;
219 const unsigned char* outend;
220 const unsigned char* outstart = out;
221 const unsigned char* instart = in;
222 const unsigned char* inend;
223 unsigned int c, d;
224 int trailing;
225
Daniel Veillardce682bc2004-11-05 17:22:25 +0000226 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000227 if (in == NULL) {
228 /*
229 * initialization nothing to do
230 */
231 *outlen = 0;
232 *inlen = 0;
233 return(0);
234 }
235 inend = in + (*inlen);
236 outend = out + (*outlen);
237 while (in < inend) {
238 d = *in++;
239 if (d < 0x80) { c= d; trailing= 0; }
240 else if (d < 0xC0) {
241 /* trailing byte in leading position */
242 *outlen = out - outstart;
243 *inlen = processed - instart;
244 return(-2);
245 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
246 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
247 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
248 else {
249 /* no chance for this in Ascii */
250 *outlen = out - outstart;
251 *inlen = processed - instart;
252 return(-2);
253 }
254
255 if (inend - in < trailing) {
256 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100257 }
Owen Taylor3473f882001-02-23 17:55:21 +0000258
259 for ( ; trailing; trailing--) {
260 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
261 break;
262 c <<= 6;
263 c |= d & 0x3F;
264 }
265
266 /* assertion: c is a single UTF-4 value */
267 if (c < 0x80) {
268 if (out >= outend)
269 break;
270 *out++ = c;
271 } else {
272 /* no chance for this in Ascii */
273 *outlen = out - outstart;
274 *inlen = processed - instart;
275 return(-2);
276 }
277 processed = in;
278 }
279 *outlen = out - outstart;
280 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000281 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000282}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000283#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000284
285/**
286 * isolat1ToUTF8:
287 * @out: a pointer to an array of bytes to store the result
288 * @outlen: the length of @out
289 * @in: a pointer to an array of ISO Latin 1 chars
290 * @inlen: the length of @in
291 *
292 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
293 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000294 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000295 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000296 * if the return value is positive, else unpredictable.
297 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000298 */
299int
300isolat1ToUTF8(unsigned char* out, int *outlen,
301 const unsigned char* in, int *inlen) {
302 unsigned char* outstart = out;
303 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000304 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000305 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000306 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000307
Daniel Veillardce682bc2004-11-05 17:22:25 +0000308 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
309 return(-1);
310
311 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000312 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000313 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100314
315 while ((in < inend) && (out < outend - 1)) {
316 if (*in >= 0x80) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000317 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100318 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000319 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000320 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100321 if ((instop - in) > (outend - out)) instop = in + (outend - out);
322 while ((in < instop) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000323 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000324 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100325 }
326 if ((in < inend) && (out < outend) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000327 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000328 }
329 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000330 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000331 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000332}
333
Daniel Veillard81601f92003-01-14 13:42:37 +0000334/**
335 * UTF8ToUTF8:
336 * @out: a pointer to an array of bytes to store the result
337 * @outlen: the length of @out
338 * @inb: a pointer to an array of UTF-8 chars
339 * @inlenb: the length of @in in UTF-8 chars
340 *
341 * No op copy operation for UTF8 handling.
342 *
William M. Brackf9415e42003-11-28 09:39:10 +0000343 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000344 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000345 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000346 */
347static int
348UTF8ToUTF8(unsigned char* out, int *outlen,
349 const unsigned char* inb, int *inlenb)
350{
351 int len;
352
353 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
354 return(-1);
355 if (*outlen > *inlenb) {
356 len = *inlenb;
357 } else {
358 len = *outlen;
359 }
360 if (len < 0)
361 return(-1);
362
363 memcpy(out, inb, len);
364
365 *outlen = len;
366 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000367 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000368}
369
Daniel Veillarde72c7562002-05-31 09:47:30 +0000370
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000371#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000372/**
373 * UTF8Toisolat1:
374 * @out: a pointer to an array of bytes to store the result
375 * @outlen: the length of @out
376 * @in: a pointer to an array of UTF-8 chars
377 * @inlen: the length of @in
378 *
379 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
380 * block of chars out.
381 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000382 * Returns the number of bytes written if success, -2 if the transcoding fails,
383 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000384 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000385 * if the return value is positive, else unpredictable.
386 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000387 */
388int
389UTF8Toisolat1(unsigned char* out, int *outlen,
390 const unsigned char* in, int *inlen) {
391 const unsigned char* processed = in;
392 const unsigned char* outend;
393 const unsigned char* outstart = out;
394 const unsigned char* instart = in;
395 const unsigned char* inend;
396 unsigned int c, d;
397 int trailing;
398
Daniel Veillardce682bc2004-11-05 17:22:25 +0000399 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000400 if (in == NULL) {
401 /*
402 * initialization nothing to do
403 */
404 *outlen = 0;
405 *inlen = 0;
406 return(0);
407 }
408 inend = in + (*inlen);
409 outend = out + (*outlen);
410 while (in < inend) {
411 d = *in++;
412 if (d < 0x80) { c= d; trailing= 0; }
413 else if (d < 0xC0) {
414 /* trailing byte in leading position */
415 *outlen = out - outstart;
416 *inlen = processed - instart;
417 return(-2);
418 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
419 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
420 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
421 else {
422 /* no chance for this in IsoLat1 */
423 *outlen = out - outstart;
424 *inlen = processed - instart;
425 return(-2);
426 }
427
428 if (inend - in < trailing) {
429 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100430 }
Owen Taylor3473f882001-02-23 17:55:21 +0000431
432 for ( ; trailing; trailing--) {
433 if (in >= inend)
434 break;
435 if (((d= *in++) & 0xC0) != 0x80) {
436 *outlen = out - outstart;
437 *inlen = processed - instart;
438 return(-2);
439 }
440 c <<= 6;
441 c |= d & 0x3F;
442 }
443
444 /* assertion: c is a single UTF-4 value */
445 if (c <= 0xFF) {
446 if (out >= outend)
447 break;
448 *out++ = c;
449 } else {
450 /* no chance for this in IsoLat1 */
451 *outlen = out - outstart;
452 *inlen = processed - instart;
453 return(-2);
454 }
455 processed = in;
456 }
457 *outlen = out - outstart;
458 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000459 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000460}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000461#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000462
463/**
464 * UTF16LEToUTF8:
465 * @out: a pointer to an array of bytes to store the result
466 * @outlen: the length of @out
467 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
468 * @inlenb: the length of @in in UTF-16LE chars
469 *
470 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000471 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000472 * is the same between the native type of this machine and the
473 * inputed one.
474 *
William M. Brackf9415e42003-11-28 09:39:10 +0000475 * Returns the number of bytes written, or -1 if lack of space, or -2
476 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000477 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000478 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000479 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000480static int
Owen Taylor3473f882001-02-23 17:55:21 +0000481UTF16LEToUTF8(unsigned char* out, int *outlen,
482 const unsigned char* inb, int *inlenb)
483{
484 unsigned char* outstart = out;
485 const unsigned char* processed = inb;
486 unsigned char* outend = out + *outlen;
487 unsigned short* in = (unsigned short*) inb;
488 unsigned short* inend;
489 unsigned int c, d, inlen;
490 unsigned char *tmp;
491 int bits;
492
493 if ((*inlenb % 2) == 1)
494 (*inlenb)--;
495 inlen = *inlenb / 2;
496 inend = in + inlen;
497 while ((in < inend) && (out - outstart + 5 < *outlen)) {
498 if (xmlLittleEndian) {
499 c= *in++;
500 } else {
501 tmp = (unsigned char *) in;
502 c = *tmp++;
503 c = c | (((unsigned int)*tmp) << 8);
504 in++;
505 }
506 if ((c & 0xFC00) == 0xD800) { /* surrogates */
507 if (in >= inend) { /* (in > inend) shouldn't happens */
508 break;
509 }
510 if (xmlLittleEndian) {
511 d = *in++;
512 } else {
513 tmp = (unsigned char *) in;
514 d = *tmp++;
515 d = d | (((unsigned int)*tmp) << 8);
516 in++;
517 }
518 if ((d & 0xFC00) == 0xDC00) {
519 c &= 0x03FF;
520 c <<= 10;
521 c |= d & 0x03FF;
522 c += 0x10000;
523 }
524 else {
525 *outlen = out - outstart;
526 *inlenb = processed - inb;
527 return(-2);
528 }
529 }
530
531 /* assertion: c is a single UTF-4 value */
532 if (out >= outend)
533 break;
534 if (c < 0x80) { *out++= c; bits= -6; }
535 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
536 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
537 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100538
Owen Taylor3473f882001-02-23 17:55:21 +0000539 for ( ; bits >= 0; bits-= 6) {
540 if (out >= outend)
541 break;
542 *out++= ((c >> bits) & 0x3F) | 0x80;
543 }
544 processed = (const unsigned char*) in;
545 }
546 *outlen = out - outstart;
547 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000548 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000549}
550
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000551#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000552/**
553 * UTF8ToUTF16LE:
554 * @outb: a pointer to an array of bytes to store the result
555 * @outlen: the length of @outb
556 * @in: a pointer to an array of UTF-8 chars
557 * @inlen: the length of @in
558 *
559 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
560 * block of chars out.
561 *
William M. Brackf9415e42003-11-28 09:39:10 +0000562 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100563 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000564 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000565static int
Owen Taylor3473f882001-02-23 17:55:21 +0000566UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567 const unsigned char* in, int *inlen)
568{
569 unsigned short* out = (unsigned short*) outb;
570 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000571 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000572 unsigned short* outstart= out;
573 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000574 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000575 unsigned int c, d;
576 int trailing;
577 unsigned char *tmp;
578 unsigned short tmp1, tmp2;
579
William M. Brackf9415e42003-11-28 09:39:10 +0000580 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000581 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000582 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000583 *outlen = 0;
584 *inlen = 0;
585 return(0);
586 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000587 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000588 outend = out + (*outlen / 2);
589 while (in < inend) {
590 d= *in++;
591 if (d < 0x80) { c= d; trailing= 0; }
592 else if (d < 0xC0) {
593 /* trailing byte in leading position */
594 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000595 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000596 return(-2);
597 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
598 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
599 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
600 else {
601 /* no chance for this in UTF-16 */
602 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000603 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000604 return(-2);
605 }
606
607 if (inend - in < trailing) {
608 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100609 }
Owen Taylor3473f882001-02-23 17:55:21 +0000610
611 for ( ; trailing; trailing--) {
612 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
613 break;
614 c <<= 6;
615 c |= d & 0x3F;
616 }
617
618 /* assertion: c is a single UTF-4 value */
619 if (c < 0x10000) {
620 if (out >= outend)
621 break;
622 if (xmlLittleEndian) {
623 *out++ = c;
624 } else {
625 tmp = (unsigned char *) out;
626 *tmp = c ;
627 *(tmp + 1) = c >> 8 ;
628 out++;
629 }
630 }
631 else if (c < 0x110000) {
632 if (out+1 >= outend)
633 break;
634 c -= 0x10000;
635 if (xmlLittleEndian) {
636 *out++ = 0xD800 | (c >> 10);
637 *out++ = 0xDC00 | (c & 0x03FF);
638 } else {
639 tmp1 = 0xD800 | (c >> 10);
640 tmp = (unsigned char *) out;
641 *tmp = (unsigned char) tmp1;
642 *(tmp + 1) = tmp1 >> 8;
643 out++;
644
645 tmp2 = 0xDC00 | (c & 0x03FF);
646 tmp = (unsigned char *) out;
647 *tmp = (unsigned char) tmp2;
648 *(tmp + 1) = tmp2 >> 8;
649 out++;
650 }
651 }
652 else
653 break;
654 processed = in;
655 }
656 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000657 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000658 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000659}
660
661/**
William M. Brackf9415e42003-11-28 09:39:10 +0000662 * UTF8ToUTF16:
663 * @outb: a pointer to an array of bytes to store the result
664 * @outlen: the length of @outb
665 * @in: a pointer to an array of UTF-8 chars
666 * @inlen: the length of @in
667 *
668 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
669 * block of chars out.
670 *
671 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100672 * if the transcoding failed.
William M. Brackf9415e42003-11-28 09:39:10 +0000673 */
674static int
675UTF8ToUTF16(unsigned char* outb, int *outlen,
676 const unsigned char* in, int *inlen)
677{
678 if (in == NULL) {
679 /*
680 * initialization, add the Byte Order Mark for UTF-16LE
681 */
682 if (*outlen >= 2) {
683 outb[0] = 0xFF;
684 outb[1] = 0xFE;
685 *outlen = 2;
686 *inlen = 0;
687#ifdef DEBUG_ENCODING
688 xmlGenericError(xmlGenericErrorContext,
689 "Added FFFE Byte Order Mark\n");
690#endif
691 return(2);
692 }
693 *outlen = 0;
694 *inlen = 0;
695 return(0);
696 }
697 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
698}
William M. Brack030a7a12004-02-10 12:48:57 +0000699#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000700
701/**
Owen Taylor3473f882001-02-23 17:55:21 +0000702 * UTF16BEToUTF8:
703 * @out: a pointer to an array of bytes to store the result
704 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000705 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000706 * @inlenb: the length of @in in UTF-16 chars
707 *
708 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000709 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000710 * is the same between the native type of this machine and the
711 * inputed one.
712 *
William M. Brackf9415e42003-11-28 09:39:10 +0000713 * Returns the number of bytes written, or -1 if lack of space, or -2
714 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000715 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000716 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000717 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000718static int
Owen Taylor3473f882001-02-23 17:55:21 +0000719UTF16BEToUTF8(unsigned char* out, int *outlen,
720 const unsigned char* inb, int *inlenb)
721{
722 unsigned char* outstart = out;
723 const unsigned char* processed = inb;
724 unsigned char* outend = out + *outlen;
725 unsigned short* in = (unsigned short*) inb;
726 unsigned short* inend;
727 unsigned int c, d, inlen;
728 unsigned char *tmp;
729 int bits;
730
731 if ((*inlenb % 2) == 1)
732 (*inlenb)--;
733 inlen = *inlenb / 2;
734 inend= in + inlen;
735 while (in < inend) {
736 if (xmlLittleEndian) {
737 tmp = (unsigned char *) in;
738 c = *tmp++;
739 c = c << 8;
740 c = c | (unsigned int) *tmp;
741 in++;
742 } else {
743 c= *in++;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100744 }
Owen Taylor3473f882001-02-23 17:55:21 +0000745 if ((c & 0xFC00) == 0xD800) { /* surrogates */
746 if (in >= inend) { /* (in > inend) shouldn't happens */
747 *outlen = out - outstart;
748 *inlenb = processed - inb;
749 return(-2);
750 }
751 if (xmlLittleEndian) {
752 tmp = (unsigned char *) in;
753 d = *tmp++;
754 d = d << 8;
755 d = d | (unsigned int) *tmp;
756 in++;
757 } else {
758 d= *in++;
759 }
760 if ((d & 0xFC00) == 0xDC00) {
761 c &= 0x03FF;
762 c <<= 10;
763 c |= d & 0x03FF;
764 c += 0x10000;
765 }
766 else {
767 *outlen = out - outstart;
768 *inlenb = processed - inb;
769 return(-2);
770 }
771 }
772
773 /* assertion: c is a single UTF-4 value */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100774 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000775 break;
776 if (c < 0x80) { *out++= c; bits= -6; }
777 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
778 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
779 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100780
Owen Taylor3473f882001-02-23 17:55:21 +0000781 for ( ; bits >= 0; bits-= 6) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100782 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000783 break;
784 *out++= ((c >> bits) & 0x3F) | 0x80;
785 }
786 processed = (const unsigned char*) in;
787 }
788 *outlen = out - outstart;
789 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000790 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000791}
792
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000793#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000794/**
795 * UTF8ToUTF16BE:
796 * @outb: a pointer to an array of bytes to store the result
797 * @outlen: the length of @outb
798 * @in: a pointer to an array of UTF-8 chars
799 * @inlen: the length of @in
800 *
801 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
802 * block of chars out.
803 *
804 * Returns the number of byte written, or -1 by lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100805 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000806 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000807static int
Owen Taylor3473f882001-02-23 17:55:21 +0000808UTF8ToUTF16BE(unsigned char* outb, int *outlen,
809 const unsigned char* in, int *inlen)
810{
811 unsigned short* out = (unsigned short*) outb;
812 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000813 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000814 unsigned short* outstart= out;
815 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000816 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000817 unsigned int c, d;
818 int trailing;
819 unsigned char *tmp;
820 unsigned short tmp1, tmp2;
821
William M. Brackf9415e42003-11-28 09:39:10 +0000822 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000823 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000824 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000825 *outlen = 0;
826 *inlen = 0;
827 return(0);
828 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000829 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000830 outend = out + (*outlen / 2);
831 while (in < inend) {
832 d= *in++;
833 if (d < 0x80) { c= d; trailing= 0; }
834 else if (d < 0xC0) {
835 /* trailing byte in leading position */
836 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000837 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000838 return(-2);
839 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
840 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
841 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
842 else {
843 /* no chance for this in UTF-16 */
844 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000845 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000846 return(-2);
847 }
848
849 if (inend - in < trailing) {
850 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100851 }
Owen Taylor3473f882001-02-23 17:55:21 +0000852
853 for ( ; trailing; trailing--) {
854 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
855 c <<= 6;
856 c |= d & 0x3F;
857 }
858
859 /* assertion: c is a single UTF-4 value */
860 if (c < 0x10000) {
861 if (out >= outend) break;
862 if (xmlLittleEndian) {
863 tmp = (unsigned char *) out;
864 *tmp = c >> 8;
865 *(tmp + 1) = c;
866 out++;
867 } else {
868 *out++ = c;
869 }
870 }
871 else if (c < 0x110000) {
872 if (out+1 >= outend) break;
873 c -= 0x10000;
874 if (xmlLittleEndian) {
875 tmp1 = 0xD800 | (c >> 10);
876 tmp = (unsigned char *) out;
877 *tmp = tmp1 >> 8;
878 *(tmp + 1) = (unsigned char) tmp1;
879 out++;
880
881 tmp2 = 0xDC00 | (c & 0x03FF);
882 tmp = (unsigned char *) out;
883 *tmp = tmp2 >> 8;
884 *(tmp + 1) = (unsigned char) tmp2;
885 out++;
886 } else {
887 *out++ = 0xD800 | (c >> 10);
888 *out++ = 0xDC00 | (c & 0x03FF);
889 }
890 }
891 else
892 break;
893 processed = in;
894 }
895 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000896 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000897 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000898}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000899#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000900
Daniel Veillard97ac1312001-05-30 19:14:17 +0000901/************************************************************************
902 * *
903 * Generic encoding handling routines *
904 * *
905 ************************************************************************/
906
Owen Taylor3473f882001-02-23 17:55:21 +0000907/**
908 * xmlDetectCharEncoding:
909 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000910 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000911 * @len: pointer to the length of the buffer
912 *
913 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000914 * according to the non-normative appendix F of the XML-1.0 recommendation.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100915 *
Owen Taylor3473f882001-02-23 17:55:21 +0000916 * Returns one of the XML_CHAR_ENCODING_... values.
917 */
918xmlCharEncoding
919xmlDetectCharEncoding(const unsigned char* in, int len)
920{
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100921 if (in == NULL)
Daniel Veillardce682bc2004-11-05 17:22:25 +0000922 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000923 if (len >= 4) {
924 if ((in[0] == 0x00) && (in[1] == 0x00) &&
925 (in[2] == 0x00) && (in[3] == 0x3C))
926 return(XML_CHAR_ENCODING_UCS4BE);
927 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
928 (in[2] == 0x00) && (in[3] == 0x00))
929 return(XML_CHAR_ENCODING_UCS4LE);
930 if ((in[0] == 0x00) && (in[1] == 0x00) &&
931 (in[2] == 0x3C) && (in[3] == 0x00))
932 return(XML_CHAR_ENCODING_UCS4_2143);
933 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
934 (in[2] == 0x00) && (in[3] == 0x00))
935 return(XML_CHAR_ENCODING_UCS4_3412);
936 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
937 (in[2] == 0xA7) && (in[3] == 0x94))
938 return(XML_CHAR_ENCODING_EBCDIC);
939 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
940 (in[2] == 0x78) && (in[3] == 0x6D))
941 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000942 /*
943 * Although not part of the recommendation, we also
944 * attempt an "auto-recognition" of UTF-16LE and
945 * UTF-16BE encodings.
946 */
947 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
948 (in[2] == 0x3F) && (in[3] == 0x00))
949 return(XML_CHAR_ENCODING_UTF16LE);
950 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
951 (in[2] == 0x00) && (in[3] == 0x3F))
952 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000953 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000954 if (len >= 3) {
955 /*
956 * Errata on XML-1.0 June 20 2001
957 * We now allow an UTF8 encoded BOM
958 */
959 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
960 (in[2] == 0xBF))
961 return(XML_CHAR_ENCODING_UTF8);
962 }
William M. Brackf9415e42003-11-28 09:39:10 +0000963 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000964 if (len >= 2) {
965 if ((in[0] == 0xFE) && (in[1] == 0xFF))
966 return(XML_CHAR_ENCODING_UTF16BE);
967 if ((in[0] == 0xFF) && (in[1] == 0xFE))
968 return(XML_CHAR_ENCODING_UTF16LE);
969 }
970 return(XML_CHAR_ENCODING_NONE);
971}
972
973/**
974 * xmlCleanupEncodingAliases:
975 *
976 * Unregisters all aliases
977 */
978void
979xmlCleanupEncodingAliases(void) {
980 int i;
981
982 if (xmlCharEncodingAliases == NULL)
983 return;
984
985 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
986 if (xmlCharEncodingAliases[i].name != NULL)
987 xmlFree((char *) xmlCharEncodingAliases[i].name);
988 if (xmlCharEncodingAliases[i].alias != NULL)
989 xmlFree((char *) xmlCharEncodingAliases[i].alias);
990 }
991 xmlCharEncodingAliasesNb = 0;
992 xmlCharEncodingAliasesMax = 0;
993 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000994 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000995}
996
997/**
998 * xmlGetEncodingAlias:
999 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1000 *
1001 * Lookup an encoding name for the given alias.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001002 *
William M. Brackf9415e42003-11-28 09:39:10 +00001003 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +00001004 */
1005const char *
1006xmlGetEncodingAlias(const char *alias) {
1007 int i;
1008 char upper[100];
1009
1010 if (alias == NULL)
1011 return(NULL);
1012
1013 if (xmlCharEncodingAliases == NULL)
1014 return(NULL);
1015
1016 for (i = 0;i < 99;i++) {
1017 upper[i] = toupper(alias[i]);
1018 if (upper[i] == 0) break;
1019 }
1020 upper[i] = 0;
1021
1022 /*
1023 * Walk down the list looking for a definition of the alias
1024 */
1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027 return(xmlCharEncodingAliases[i].name);
1028 }
1029 }
1030 return(NULL);
1031}
1032
1033/**
1034 * xmlAddEncodingAlias:
1035 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1036 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1037 *
William M. Brackf9415e42003-11-28 09:39:10 +00001038 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +00001039 * will be overwritten.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001040 *
Owen Taylor3473f882001-02-23 17:55:21 +00001041 * Returns 0 in case of success, -1 in case of error
1042 */
1043int
1044xmlAddEncodingAlias(const char *name, const char *alias) {
1045 int i;
1046 char upper[100];
1047
1048 if ((name == NULL) || (alias == NULL))
1049 return(-1);
1050
1051 for (i = 0;i < 99;i++) {
1052 upper[i] = toupper(alias[i]);
1053 if (upper[i] == 0) break;
1054 }
1055 upper[i] = 0;
1056
1057 if (xmlCharEncodingAliases == NULL) {
1058 xmlCharEncodingAliasesNb = 0;
1059 xmlCharEncodingAliasesMax = 20;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001060 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001061 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062 if (xmlCharEncodingAliases == NULL)
1063 return(-1);
1064 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065 xmlCharEncodingAliasesMax *= 2;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001066 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001067 xmlRealloc(xmlCharEncodingAliases,
1068 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1069 }
1070 /*
1071 * Walk down the list looking for a definition of the alias
1072 */
1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075 /*
1076 * Replace the definition.
1077 */
1078 xmlFree((char *) xmlCharEncodingAliases[i].name);
1079 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1080 return(0);
1081 }
1082 }
1083 /*
1084 * Add the definition
1085 */
1086 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088 xmlCharEncodingAliasesNb++;
1089 return(0);
1090}
1091
1092/**
1093 * xmlDelEncodingAlias:
1094 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1095 *
1096 * Unregisters an encoding alias @alias
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001097 *
Owen Taylor3473f882001-02-23 17:55:21 +00001098 * Returns 0 in case of success, -1 in case of error
1099 */
1100int
1101xmlDelEncodingAlias(const char *alias) {
1102 int i;
1103
1104 if (alias == NULL)
1105 return(-1);
1106
1107 if (xmlCharEncodingAliases == NULL)
1108 return(-1);
1109 /*
1110 * Walk down the list looking for a definition of the alias
1111 */
1112 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114 xmlFree((char *) xmlCharEncodingAliases[i].name);
1115 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116 xmlCharEncodingAliasesNb--;
1117 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1119 return(0);
1120 }
1121 }
1122 return(-1);
1123}
1124
1125/**
1126 * xmlParseCharEncoding:
1127 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1128 *
William M. Brackf9415e42003-11-28 09:39:10 +00001129 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001130 * that the comparison is case insensitive accordingly to the section
1131 * [XML] 4.3.3 Character Encoding in Entities.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001132 *
Owen Taylor3473f882001-02-23 17:55:21 +00001133 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134 * if not recognized.
1135 */
1136xmlCharEncoding
1137xmlParseCharEncoding(const char* name)
1138{
1139 const char *alias;
1140 char upper[500];
1141 int i;
1142
1143 if (name == NULL)
1144 return(XML_CHAR_ENCODING_NONE);
1145
1146 /*
1147 * Do the alias resolution
1148 */
1149 alias = xmlGetEncodingAlias(name);
1150 if (alias != NULL)
1151 name = alias;
1152
1153 for (i = 0;i < 499;i++) {
1154 upper[i] = toupper(name[i]);
1155 if (upper[i] == 0) break;
1156 }
1157 upper[i] = 0;
1158
1159 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1162
1163 /*
1164 * NOTE: if we were able to parse this, the endianness of UTF16 is
1165 * already found and in use
1166 */
1167 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001169
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1173
1174 /*
1175 * NOTE: if we were able to parse this, the endianness of UCS4 is
1176 * already found and in use
1177 */
1178 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1181
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001182
Owen Taylor3473f882001-02-23 17:55:21 +00001183 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1186
1187 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1190
1191 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1198
1199 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1202
1203#ifdef DEBUG_ENCODING
1204 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1205#endif
1206 return(XML_CHAR_ENCODING_ERROR);
1207}
1208
1209/**
1210 * xmlGetCharEncodingName:
1211 * @enc: the encoding
1212 *
1213 * The "canonical" name for XML encoding.
1214 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215 * Section 4.3.3 Character Encoding in Entities
1216 *
1217 * Returns the canonical name for the given encoding
1218 */
1219
1220const char*
1221xmlGetCharEncodingName(xmlCharEncoding enc) {
1222 switch (enc) {
1223 case XML_CHAR_ENCODING_ERROR:
1224 return(NULL);
1225 case XML_CHAR_ENCODING_NONE:
1226 return(NULL);
1227 case XML_CHAR_ENCODING_UTF8:
1228 return("UTF-8");
1229 case XML_CHAR_ENCODING_UTF16LE:
1230 return("UTF-16");
1231 case XML_CHAR_ENCODING_UTF16BE:
1232 return("UTF-16");
1233 case XML_CHAR_ENCODING_EBCDIC:
1234 return("EBCDIC");
1235 case XML_CHAR_ENCODING_UCS4LE:
1236 return("ISO-10646-UCS-4");
1237 case XML_CHAR_ENCODING_UCS4BE:
1238 return("ISO-10646-UCS-4");
1239 case XML_CHAR_ENCODING_UCS4_2143:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4_3412:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS2:
1244 return("ISO-10646-UCS-2");
1245 case XML_CHAR_ENCODING_8859_1:
1246 return("ISO-8859-1");
1247 case XML_CHAR_ENCODING_8859_2:
1248 return("ISO-8859-2");
1249 case XML_CHAR_ENCODING_8859_3:
1250 return("ISO-8859-3");
1251 case XML_CHAR_ENCODING_8859_4:
1252 return("ISO-8859-4");
1253 case XML_CHAR_ENCODING_8859_5:
1254 return("ISO-8859-5");
1255 case XML_CHAR_ENCODING_8859_6:
1256 return("ISO-8859-6");
1257 case XML_CHAR_ENCODING_8859_7:
1258 return("ISO-8859-7");
1259 case XML_CHAR_ENCODING_8859_8:
1260 return("ISO-8859-8");
1261 case XML_CHAR_ENCODING_8859_9:
1262 return("ISO-8859-9");
1263 case XML_CHAR_ENCODING_2022_JP:
1264 return("ISO-2022-JP");
1265 case XML_CHAR_ENCODING_SHIFT_JIS:
1266 return("Shift-JIS");
1267 case XML_CHAR_ENCODING_EUC_JP:
1268 return("EUC-JP");
1269 case XML_CHAR_ENCODING_ASCII:
1270 return(NULL);
1271 }
1272 return(NULL);
1273}
1274
Daniel Veillard97ac1312001-05-30 19:14:17 +00001275/************************************************************************
1276 * *
1277 * Char encoding handlers *
1278 * *
1279 ************************************************************************/
1280
Owen Taylor3473f882001-02-23 17:55:21 +00001281
1282/* the size should be growable, but it's not a big deal ... */
1283#define MAX_ENCODING_HANDLERS 50
1284static xmlCharEncodingHandlerPtr *handlers = NULL;
1285static int nbCharEncodingHandler = 0;
1286
1287/*
1288 * The default is UTF-8 for XML, that's also the default used for the
1289 * parser internals, so the default encoding handler is NULL
1290 */
1291
1292static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1293
1294/**
1295 * xmlNewCharEncodingHandler:
1296 * @name: the encoding name, in UTF-8 format (ASCII actually)
1297 * @input: the xmlCharEncodingInputFunc to read that encoding
1298 * @output: the xmlCharEncodingOutputFunc to write that encoding
1299 *
1300 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001301 *
Owen Taylor3473f882001-02-23 17:55:21 +00001302 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1303 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001304xmlCharEncodingHandlerPtr
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001305xmlNewCharEncodingHandler(const char *name,
Owen Taylor3473f882001-02-23 17:55:21 +00001306 xmlCharEncodingInputFunc input,
1307 xmlCharEncodingOutputFunc output) {
1308 xmlCharEncodingHandlerPtr handler;
1309 const char *alias;
1310 char upper[500];
1311 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001312 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001313
1314 /*
1315 * Do the alias resolution
1316 */
1317 alias = xmlGetEncodingAlias(name);
1318 if (alias != NULL)
1319 name = alias;
1320
1321 /*
1322 * Keep only the uppercase version of the encoding.
1323 */
1324 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001325 xmlEncodingErr(XML_I18N_NO_NAME,
1326 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001327 return(NULL);
1328 }
1329 for (i = 0;i < 499;i++) {
1330 upper[i] = toupper(name[i]);
1331 if (upper[i] == 0) break;
1332 }
1333 upper[i] = 0;
1334 up = xmlMemStrdup(upper);
1335 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001336 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001337 return(NULL);
1338 }
1339
1340 /*
1341 * allocate and fill-up an handler block.
1342 */
1343 handler = (xmlCharEncodingHandlerPtr)
1344 xmlMalloc(sizeof(xmlCharEncodingHandler));
1345 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001346 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001347 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001348 return(NULL);
1349 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001350 memset(handler, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001351 handler->input = input;
1352 handler->output = output;
1353 handler->name = up;
1354
1355#ifdef LIBXML_ICONV_ENABLED
1356 handler->iconv_in = NULL;
1357 handler->iconv_out = NULL;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001358#endif
1359#ifdef LIBXML_ICU_ENABLED
1360 handler->uconv_in = NULL;
1361 handler->uconv_out = NULL;
1362#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * registers and returns the handler.
1366 */
1367 xmlRegisterCharEncodingHandler(handler);
1368#ifdef DEBUG_ENCODING
1369 xmlGenericError(xmlGenericErrorContext,
1370 "Registered encoding handler for %s\n", name);
1371#endif
1372 return(handler);
1373}
1374
1375/**
1376 * xmlInitCharEncodingHandlers:
1377 *
1378 * Initialize the char encoding support, it registers the default
1379 * encoding supported.
1380 * NOTE: while public, this function usually doesn't need to be called
1381 * in normal processing.
1382 */
1383void
1384xmlInitCharEncodingHandlers(void) {
1385 unsigned short int tst = 0x1234;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001386 unsigned char *ptr = (unsigned char *) &tst;
Owen Taylor3473f882001-02-23 17:55:21 +00001387
1388 if (handlers != NULL) return;
1389
1390 handlers = (xmlCharEncodingHandlerPtr *)
1391 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1392
1393 if (*ptr == 0x12) xmlLittleEndian = 0;
1394 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001395 else {
1396 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1397 "Odd problem at endianness detection\n", NULL);
1398 }
Owen Taylor3473f882001-02-23 17:55:21 +00001399
1400 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001401 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001402 return;
1403 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001404 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001405#ifdef LIBXML_OUTPUT_ENABLED
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001406 xmlUTF16LEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001407 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001408 xmlUTF16BEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001409 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001410 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001411 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1412 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001413 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001414#ifdef LIBXML_HTML_ENABLED
1415 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1416#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001417#else
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001418 xmlUTF16LEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001419 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001420 xmlUTF16BEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001421 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001422 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001423 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1424 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1425 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1426#endif /* LIBXML_OUTPUT_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001427#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001428#ifdef LIBXML_ISO8859X_ENABLED
1429 xmlRegisterCharEncodingHandlersISO8859x ();
1430#endif
1431#endif
1432
Owen Taylor3473f882001-02-23 17:55:21 +00001433}
1434
1435/**
1436 * xmlCleanupCharEncodingHandlers:
1437 *
1438 * Cleanup the memory allocated for the char encoding support, it
1439 * unregisters all the encoding handlers and the aliases.
1440 */
1441void
1442xmlCleanupCharEncodingHandlers(void) {
1443 xmlCleanupEncodingAliases();
1444
1445 if (handlers == NULL) return;
1446
1447 for (;nbCharEncodingHandler > 0;) {
1448 nbCharEncodingHandler--;
1449 if (handlers[nbCharEncodingHandler] != NULL) {
1450 if (handlers[nbCharEncodingHandler]->name != NULL)
1451 xmlFree(handlers[nbCharEncodingHandler]->name);
1452 xmlFree(handlers[nbCharEncodingHandler]);
1453 }
1454 }
1455 xmlFree(handlers);
1456 handlers = NULL;
1457 nbCharEncodingHandler = 0;
1458 xmlDefaultCharEncodingHandler = NULL;
1459}
1460
1461/**
1462 * xmlRegisterCharEncodingHandler:
1463 * @handler: the xmlCharEncodingHandlerPtr handler block
1464 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001465 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001466 */
1467void
1468xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1469 if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard76d36452009-09-07 11:19:33 +02001470 if ((handler == NULL) || (handlers == NULL)) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001471 xmlEncodingErr(XML_I18N_NO_HANDLER,
1472 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001473 return;
1474 }
1475
1476 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001477 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1478 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1479 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001480 return;
1481 }
1482 handlers[nbCharEncodingHandler++] = handler;
1483}
1484
1485/**
1486 * xmlGetCharEncodingHandler:
1487 * @enc: an xmlCharEncoding value.
1488 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001489 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001490 *
1491 * Returns the handler or NULL if not found
1492 */
1493xmlCharEncodingHandlerPtr
1494xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1495 xmlCharEncodingHandlerPtr handler;
1496
1497 if (handlers == NULL) xmlInitCharEncodingHandlers();
1498 switch (enc) {
1499 case XML_CHAR_ENCODING_ERROR:
1500 return(NULL);
1501 case XML_CHAR_ENCODING_NONE:
1502 return(NULL);
1503 case XML_CHAR_ENCODING_UTF8:
1504 return(NULL);
1505 case XML_CHAR_ENCODING_UTF16LE:
1506 return(xmlUTF16LEHandler);
1507 case XML_CHAR_ENCODING_UTF16BE:
1508 return(xmlUTF16BEHandler);
1509 case XML_CHAR_ENCODING_EBCDIC:
1510 handler = xmlFindCharEncodingHandler("EBCDIC");
1511 if (handler != NULL) return(handler);
1512 handler = xmlFindCharEncodingHandler("ebcdic");
1513 if (handler != NULL) return(handler);
Martin Köglerc78988a2009-08-24 16:47:48 +02001514 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1515 if (handler != NULL) return(handler);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 break;
1517 case XML_CHAR_ENCODING_UCS4BE:
1518 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1519 if (handler != NULL) return(handler);
1520 handler = xmlFindCharEncodingHandler("UCS-4");
1521 if (handler != NULL) return(handler);
1522 handler = xmlFindCharEncodingHandler("UCS4");
1523 if (handler != NULL) return(handler);
1524 break;
1525 case XML_CHAR_ENCODING_UCS4LE:
1526 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1527 if (handler != NULL) return(handler);
1528 handler = xmlFindCharEncodingHandler("UCS-4");
1529 if (handler != NULL) return(handler);
1530 handler = xmlFindCharEncodingHandler("UCS4");
1531 if (handler != NULL) return(handler);
1532 break;
1533 case XML_CHAR_ENCODING_UCS4_2143:
1534 break;
1535 case XML_CHAR_ENCODING_UCS4_3412:
1536 break;
1537 case XML_CHAR_ENCODING_UCS2:
1538 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1539 if (handler != NULL) return(handler);
1540 handler = xmlFindCharEncodingHandler("UCS-2");
1541 if (handler != NULL) return(handler);
1542 handler = xmlFindCharEncodingHandler("UCS2");
1543 if (handler != NULL) return(handler);
1544 break;
1545
1546 /*
1547 * We used to keep ISO Latin encodings native in the
1548 * generated data. This led to so many problems that
1549 * this has been removed. One can still change this
1550 * back by registering no-ops encoders for those
1551 */
1552 case XML_CHAR_ENCODING_8859_1:
1553 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1554 if (handler != NULL) return(handler);
1555 break;
1556 case XML_CHAR_ENCODING_8859_2:
1557 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1558 if (handler != NULL) return(handler);
1559 break;
1560 case XML_CHAR_ENCODING_8859_3:
1561 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1562 if (handler != NULL) return(handler);
1563 break;
1564 case XML_CHAR_ENCODING_8859_4:
1565 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1566 if (handler != NULL) return(handler);
1567 break;
1568 case XML_CHAR_ENCODING_8859_5:
1569 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1570 if (handler != NULL) return(handler);
1571 break;
1572 case XML_CHAR_ENCODING_8859_6:
1573 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1574 if (handler != NULL) return(handler);
1575 break;
1576 case XML_CHAR_ENCODING_8859_7:
1577 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1578 if (handler != NULL) return(handler);
1579 break;
1580 case XML_CHAR_ENCODING_8859_8:
1581 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1582 if (handler != NULL) return(handler);
1583 break;
1584 case XML_CHAR_ENCODING_8859_9:
1585 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1586 if (handler != NULL) return(handler);
1587 break;
1588
1589
1590 case XML_CHAR_ENCODING_2022_JP:
1591 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1592 if (handler != NULL) return(handler);
1593 break;
1594 case XML_CHAR_ENCODING_SHIFT_JIS:
1595 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1596 if (handler != NULL) return(handler);
1597 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1598 if (handler != NULL) return(handler);
1599 handler = xmlFindCharEncodingHandler("Shift_JIS");
1600 if (handler != NULL) return(handler);
1601 break;
1602 case XML_CHAR_ENCODING_EUC_JP:
1603 handler = xmlFindCharEncodingHandler("EUC-JP");
1604 if (handler != NULL) return(handler);
1605 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001606 default:
Owen Taylor3473f882001-02-23 17:55:21 +00001607 break;
1608 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001609
Owen Taylor3473f882001-02-23 17:55:21 +00001610#ifdef DEBUG_ENCODING
1611 xmlGenericError(xmlGenericErrorContext,
1612 "No handler found for encoding %d\n", enc);
1613#endif
1614 return(NULL);
1615}
1616
1617/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001618 * xmlFindCharEncodingHandler:
1619 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001620 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001621 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001622 *
1623 * Returns the handler or NULL if not found
1624 */
1625xmlCharEncodingHandlerPtr
1626xmlFindCharEncodingHandler(const char *name) {
1627 const char *nalias;
1628 const char *norig;
1629 xmlCharEncoding alias;
1630#ifdef LIBXML_ICONV_ENABLED
1631 xmlCharEncodingHandlerPtr enc;
1632 iconv_t icv_in, icv_out;
1633#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001634#ifdef LIBXML_ICU_ENABLED
1635 xmlCharEncodingHandlerPtr encu;
1636 uconv_t *ucv_in, *ucv_out;
1637#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001638 char upper[100];
1639 int i;
1640
1641 if (handlers == NULL) xmlInitCharEncodingHandlers();
1642 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1643 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1644
1645 /*
1646 * Do the alias resolution
1647 */
1648 norig = name;
1649 nalias = xmlGetEncodingAlias(name);
1650 if (nalias != NULL)
1651 name = nalias;
1652
1653 /*
1654 * Check first for directly registered encoding names
1655 */
1656 for (i = 0;i < 99;i++) {
1657 upper[i] = toupper(name[i]);
1658 if (upper[i] == 0) break;
1659 }
1660 upper[i] = 0;
1661
Daniel Veillardd44b9362009-09-07 12:15:08 +02001662 if (handlers != NULL) {
1663 for (i = 0;i < nbCharEncodingHandler; i++) {
1664 if (!strcmp(upper, handlers[i]->name)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001665#ifdef DEBUG_ENCODING
Daniel Veillardd44b9362009-09-07 12:15:08 +02001666 xmlGenericError(xmlGenericErrorContext,
1667 "Found registered handler for encoding %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001668#endif
Daniel Veillardd44b9362009-09-07 12:15:08 +02001669 return(handlers[i]);
1670 }
1671 }
1672 }
Owen Taylor3473f882001-02-23 17:55:21 +00001673
1674#ifdef LIBXML_ICONV_ENABLED
1675 /* check whether iconv can handle this */
1676 icv_in = iconv_open("UTF-8", name);
1677 icv_out = iconv_open(name, "UTF-8");
Daniel Veillard28aac0b2006-10-16 08:31:18 +00001678 if (icv_in == (iconv_t) -1) {
1679 icv_in = iconv_open("UTF-8", upper);
1680 }
1681 if (icv_out == (iconv_t) -1) {
1682 icv_out = iconv_open(upper, "UTF-8");
1683 }
Owen Taylor3473f882001-02-23 17:55:21 +00001684 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1685 enc = (xmlCharEncodingHandlerPtr)
1686 xmlMalloc(sizeof(xmlCharEncodingHandler));
1687 if (enc == NULL) {
1688 iconv_close(icv_in);
1689 iconv_close(icv_out);
1690 return(NULL);
1691 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001692 memset(enc, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001693 enc->name = xmlMemStrdup(name);
1694 enc->input = NULL;
1695 enc->output = NULL;
1696 enc->iconv_in = icv_in;
1697 enc->iconv_out = icv_out;
1698#ifdef DEBUG_ENCODING
1699 xmlGenericError(xmlGenericErrorContext,
1700 "Found iconv handler for encoding %s\n", name);
1701#endif
1702 return enc;
1703 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001704 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001705 "iconv : problems with filters for '%s'\n", name);
1706 }
1707#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001708#ifdef LIBXML_ICU_ENABLED
1709 /* check whether icu can handle this */
1710 ucv_in = openIcuConverter(name, 1);
1711 ucv_out = openIcuConverter(name, 0);
1712 if (ucv_in != NULL && ucv_out != NULL) {
1713 encu = (xmlCharEncodingHandlerPtr)
1714 xmlMalloc(sizeof(xmlCharEncodingHandler));
1715 if (encu == NULL) {
1716 closeIcuConverter(ucv_in);
1717 closeIcuConverter(ucv_out);
1718 return(NULL);
1719 }
1720 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1721 encu->name = xmlMemStrdup(name);
1722 encu->input = NULL;
1723 encu->output = NULL;
1724 encu->uconv_in = ucv_in;
1725 encu->uconv_out = ucv_out;
1726#ifdef DEBUG_ENCODING
1727 xmlGenericError(xmlGenericErrorContext,
1728 "Found ICU converter handler for encoding %s\n", name);
1729#endif
1730 return encu;
1731 } else if (ucv_in != NULL || ucv_out != NULL) {
1732 closeIcuConverter(ucv_in);
1733 closeIcuConverter(ucv_out);
1734 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1735 "ICU converter : problems with filters for '%s'\n", name);
1736 }
1737#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001738
1739#ifdef DEBUG_ENCODING
1740 xmlGenericError(xmlGenericErrorContext,
1741 "No handler found for encoding %s\n", name);
1742#endif
1743
1744 /*
1745 * Fallback using the canonical names
1746 */
1747 alias = xmlParseCharEncoding(norig);
1748 if (alias != XML_CHAR_ENCODING_ERROR) {
1749 const char* canon;
1750 canon = xmlGetCharEncodingName(alias);
1751 if ((canon != NULL) && (strcmp(name, canon))) {
1752 return(xmlFindCharEncodingHandler(canon));
1753 }
1754 }
1755
William M. Brackf9415e42003-11-28 09:39:10 +00001756 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001757 return(NULL);
1758}
1759
Daniel Veillard97ac1312001-05-30 19:14:17 +00001760/************************************************************************
1761 * *
1762 * ICONV based generic conversion functions *
1763 * *
1764 ************************************************************************/
1765
Owen Taylor3473f882001-02-23 17:55:21 +00001766#ifdef LIBXML_ICONV_ENABLED
1767/**
1768 * xmlIconvWrapper:
1769 * @cd: iconv converter data structure
1770 * @out: a pointer to an array of bytes to store the result
1771 * @outlen: the length of @out
1772 * @in: a pointer to an array of ISO Latin 1 chars
1773 * @inlen: the length of @in
1774 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001775 * Returns 0 if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001776 * -1 by lack of space, or
1777 * -2 if the transcoding fails (for *in is not valid utf8 string or
1778 * the result of transformation can't fit into the encoding we want), or
1779 * -3 if there the last byte can't form a single output char.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001780 *
Owen Taylor3473f882001-02-23 17:55:21 +00001781 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001782 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001783 * The value of @outlen after return is the number of ocetes consumed.
1784 */
1785static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001786xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1787 const unsigned char *in, int *inlen) {
1788 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001789 const char *icv_in = (const char *) in;
1790 char *icv_out = (char *) out;
1791 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001792
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001793 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1794 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001795 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001796 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001797 icv_inlen = *inlen;
1798 icv_outlen = *outlen;
Daniel Veillard8e1a46d2008-02-15 07:47:26 +00001799 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard2728f842006-03-09 16:49:24 +00001800 *inlen -= icv_inlen;
1801 *outlen -= icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001802 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001803#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001804 if (errno == EILSEQ) {
1805 return -2;
1806 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001807#endif
1808#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001809 if (errno == E2BIG) {
1810 return -1;
1811 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001812#endif
1813#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001814 if (errno == EINVAL) {
1815 return -3;
1816 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001817#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001818 {
1819 return -3;
1820 }
1821 }
1822 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001823}
1824#endif /* LIBXML_ICONV_ENABLED */
1825
Daniel Veillard97ac1312001-05-30 19:14:17 +00001826/************************************************************************
1827 * *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001828 * ICU based generic conversion functions *
1829 * *
1830 ************************************************************************/
1831
1832#ifdef LIBXML_ICU_ENABLED
1833/**
1834 * xmlUconvWrapper:
1835 * @cd: ICU uconverter data structure
1836 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1837 * @out: a pointer to an array of bytes to store the result
1838 * @outlen: the length of @out
1839 * @in: a pointer to an array of ISO Latin 1 chars
1840 * @inlen: the length of @in
1841 *
1842 * Returns 0 if success, or
1843 * -1 by lack of space, or
1844 * -2 if the transcoding fails (for *in is not valid utf8 string or
1845 * the result of transformation can't fit into the encoding we want), or
1846 * -3 if there the last byte can't form a single output char.
1847 *
1848 * The value of @inlen after return is the number of octets consumed
1849 * as the return value is positive, else unpredictable.
1850 * The value of @outlen after return is the number of ocetes consumed.
1851 */
1852static int
1853xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1854 const unsigned char *in, int *inlen) {
1855 const char *ucv_in = (const char *) in;
1856 char *ucv_out = (char *) out;
1857 UErrorCode err = U_ZERO_ERROR;
1858
1859 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1860 if (outlen != NULL) *outlen = 0;
1861 return(-1);
1862 }
1863
1864 /*
1865 * TODO(jungshik)
1866 * 1. is ucnv_convert(To|From)Algorithmic better?
1867 * 2. had we better use an explicit pivot buffer?
1868 * 3. error returned comes from 'fromUnicode' only even
1869 * when toUnicode is true !
1870 */
1871 if (toUnicode) {
1872 /* encoding => UTF-16 => UTF-8 */
1873 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1874 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1875 0, TRUE, &err);
1876 } else {
1877 /* UTF-8 => UTF-16 => encoding */
1878 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1879 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1880 0, TRUE, &err);
1881 }
1882 *inlen = ucv_in - (const char*) in;
1883 *outlen = ucv_out - (char *) out;
1884 if (U_SUCCESS(err))
1885 return 0;
1886 if (err == U_BUFFER_OVERFLOW_ERROR)
1887 return -1;
1888 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1889 return -2;
1890 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1891 return -3;
1892}
1893#endif /* LIBXML_ICU_ENABLED */
1894
1895/************************************************************************
1896 * *
Daniel Veillard97ac1312001-05-30 19:14:17 +00001897 * The real API used by libxml for on-the-fly conversion *
1898 * *
1899 ************************************************************************/
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001900int
1901xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1902 xmlBufferPtr in, int len);
Daniel Veillard97ac1312001-05-30 19:14:17 +00001903
Owen Taylor3473f882001-02-23 17:55:21 +00001904/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001905 * xmlCharEncFirstLineInt:
Owen Taylor3473f882001-02-23 17:55:21 +00001906 * @handler: char enconding transformation data structure
1907 * @out: an xmlBuffer for the output.
1908 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001909 * @len: number of bytes to convert for the first line, or -1
1910 *
Owen Taylor3473f882001-02-23 17:55:21 +00001911 * Front-end for the encoding handler input function, but handle only
1912 * the very first line, i.e. limit itself to 45 chars.
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001913 *
1914 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * -1 general error
1916 * -2 if the transcoding fails (for *in is not valid utf8 string or
1917 * the result of transformation can't fit into the encoding we want), or
1918 */
1919int
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001920xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1921 xmlBufferPtr in, int len) {
Owen Taylor3473f882001-02-23 17:55:21 +00001922 int ret = -2;
1923 int written;
1924 int toconv;
1925
1926 if (handler == NULL) return(-1);
1927 if (out == NULL) return(-1);
1928 if (in == NULL) return(-1);
1929
William M. Brack38d452a2007-05-22 16:00:06 +00001930 /* calculate space available */
Daniel Veillard69f04562011-08-19 11:05:04 +08001931 written = out->size - out->use - 1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00001932 toconv = in->use;
Owen Taylor3473f882001-02-23 17:55:21 +00001933 /*
1934 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1935 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001936 * declaration without going too far inside the document content.
Daniel Veillard57c9db02008-03-06 14:37:10 +00001937 * on UTF-16 this means 90bytes, on UCS4 this means 180
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001938 * The actual value depending on guessed encoding is passed as @len
1939 * if provided
Owen Taylor3473f882001-02-23 17:55:21 +00001940 */
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001941 if (len >= 0) {
1942 if (toconv > len)
1943 toconv = len;
1944 } else {
1945 if (toconv > 180)
1946 toconv = 180;
1947 }
William M. Brack38d452a2007-05-22 16:00:06 +00001948 if (toconv * 2 >= written) {
1949 xmlBufferGrow(out, toconv);
1950 written = out->size - out->use - 1;
1951 }
Owen Taylor3473f882001-02-23 17:55:21 +00001952
1953 if (handler->input != NULL) {
1954 ret = handler->input(&out->content[out->use], &written,
1955 in->content, &toconv);
1956 xmlBufferShrink(in, toconv);
1957 out->use += written;
1958 out->content[out->use] = 0;
1959 }
1960#ifdef LIBXML_ICONV_ENABLED
1961 else if (handler->iconv_in != NULL) {
1962 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1963 &written, in->content, &toconv);
1964 xmlBufferShrink(in, toconv);
1965 out->use += written;
1966 out->content[out->use] = 0;
1967 if (ret == -1) ret = -3;
1968 }
1969#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001970#ifdef LIBXML_ICU_ENABLED
1971 else if (handler->uconv_in != NULL) {
1972 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1973 &written, in->content, &toconv);
1974 xmlBufferShrink(in, toconv);
1975 out->use += written;
1976 out->content[out->use] = 0;
1977 if (ret == -1) ret = -3;
1978 }
1979#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001980#ifdef DEBUG_ENCODING
1981 switch (ret) {
1982 case 0:
1983 xmlGenericError(xmlGenericErrorContext,
1984 "converted %d bytes to %d bytes of input\n",
1985 toconv, written);
1986 break;
1987 case -1:
1988 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1989 toconv, written, in->use);
1990 break;
1991 case -2:
1992 xmlGenericError(xmlGenericErrorContext,
1993 "input conversion failed due to input error\n");
1994 break;
1995 case -3:
1996 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1997 toconv, written, in->use);
1998 break;
1999 default:
2000 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2001 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002002#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002003 /*
2004 * Ignore when input buffer is not on a boundary
2005 */
2006 if (ret == -3) ret = 0;
2007 if (ret == -1) ret = 0;
2008 return(ret);
2009}
2010
2011/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002012 * xmlCharEncFirstLine:
2013 * @handler: char enconding transformation data structure
2014 * @out: an xmlBuffer for the output.
2015 * @in: an xmlBuffer for the input
2016 *
2017 * Front-end for the encoding handler input function, but handle only
2018 * the very first line, i.e. limit itself to 45 chars.
2019 *
2020 * Returns the number of byte written if success, or
2021 * -1 general error
2022 * -2 if the transcoding fails (for *in is not valid utf8 string or
2023 * the result of transformation can't fit into the encoding we want), or
2024 */
2025int
2026xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2027 xmlBufferPtr in) {
2028 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2029}
2030
2031/**
Owen Taylor3473f882001-02-23 17:55:21 +00002032 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002033 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002034 * @out: an xmlBuffer for the output.
2035 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002036 *
Owen Taylor3473f882001-02-23 17:55:21 +00002037 * Generic front-end for the encoding handler input function
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002038 *
2039 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002040 * -1 general error
2041 * -2 if the transcoding fails (for *in is not valid utf8 string or
2042 * the result of transformation can't fit into the encoding we want), or
2043 */
2044int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002045xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2046 xmlBufferPtr in)
2047{
Owen Taylor3473f882001-02-23 17:55:21 +00002048 int ret = -2;
2049 int written;
2050 int toconv;
2051
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002052 if (handler == NULL)
2053 return (-1);
2054 if (out == NULL)
2055 return (-1);
2056 if (in == NULL)
2057 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002058
2059 toconv = in->use;
2060 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002061 return (0);
Daniel Veillard69f04562011-08-19 11:05:04 +08002062 written = out->size - out->use -1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00002063 if (toconv * 2 >= written) {
2064 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002065 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002066 }
2067 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002068 ret = handler->input(&out->content[out->use], &written,
2069 in->content, &toconv);
2070 xmlBufferShrink(in, toconv);
2071 out->use += written;
2072 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002073 }
2074#ifdef LIBXML_ICONV_ENABLED
2075 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002076 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2077 &written, in->content, &toconv);
2078 xmlBufferShrink(in, toconv);
2079 out->use += written;
2080 out->content[out->use] = 0;
2081 if (ret == -1)
2082 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002083 }
2084#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002085#ifdef LIBXML_ICU_ENABLED
2086 else if (handler->uconv_in != NULL) {
2087 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2088 &written, in->content, &toconv);
2089 xmlBufferShrink(in, toconv);
2090 out->use += written;
2091 out->content[out->use] = 0;
2092 if (ret == -1)
2093 ret = -3;
2094 }
2095#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002096 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002097 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002098#ifdef DEBUG_ENCODING
2099 xmlGenericError(xmlGenericErrorContext,
2100 "converted %d bytes to %d bytes of input\n",
2101 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002102#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002103 break;
2104 case -1:
2105#ifdef DEBUG_ENCODING
2106 xmlGenericError(xmlGenericErrorContext,
2107 "converted %d bytes to %d bytes of input, %d left\n",
2108 toconv, written, in->use);
2109#endif
2110 break;
2111 case -3:
2112#ifdef DEBUG_ENCODING
2113 xmlGenericError(xmlGenericErrorContext,
2114 "converted %d bytes to %d bytes of input, %d left\n",
2115 toconv, written, in->use);
2116#endif
2117 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002118 case -2: {
2119 char buf[50];
2120
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002121 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002122 in->content[0], in->content[1],
2123 in->content[2], in->content[3]);
2124 buf[49] = 0;
2125 xmlEncodingErr(XML_I18N_CONV_FAILED,
2126 "input conversion failed due to input error, bytes %s\n",
2127 buf);
2128 }
Owen Taylor3473f882001-02-23 17:55:21 +00002129 }
2130 /*
2131 * Ignore when input buffer is not on a boundary
2132 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002133 if (ret == -3)
2134 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00002135 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002136}
2137
2138/**
2139 * xmlCharEncOutFunc:
2140 * @handler: char enconding transformation data structure
2141 * @out: an xmlBuffer for the output.
2142 * @in: an xmlBuffer for the input
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002143 *
Owen Taylor3473f882001-02-23 17:55:21 +00002144 * Generic front-end for the encoding handler output function
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002145 * a first call with @in == NULL has to be made firs to initiate the
Owen Taylor3473f882001-02-23 17:55:21 +00002146 * output in case of non-stateless encoding needing to initiate their
2147 * state or the output (like the BOM in UTF16).
2148 * In case of UTF8 sequence conversion errors for the given encoder,
2149 * the content will be automatically remapped to a CharRef sequence.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002150 *
2151 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002152 * -1 general error
2153 * -2 if the transcoding fails (for *in is not valid utf8 string or
2154 * the result of transformation can't fit into the encoding we want), or
2155 */
2156int
2157xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2158 xmlBufferPtr in) {
2159 int ret = -2;
2160 int written;
2161 int writtentot = 0;
2162 int toconv;
2163 int output = 0;
2164
2165 if (handler == NULL) return(-1);
2166 if (out == NULL) return(-1);
2167
2168retry:
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002169
Owen Taylor3473f882001-02-23 17:55:21 +00002170 written = out->size - out->use;
2171
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002172 if (written > 0)
2173 written--; /* Gennady: count '/0' */
2174
Owen Taylor3473f882001-02-23 17:55:21 +00002175 /*
2176 * First specific handling of in = NULL, i.e. the initialization call
2177 */
2178 if (in == NULL) {
2179 toconv = 0;
2180 if (handler->output != NULL) {
2181 ret = handler->output(&out->content[out->use], &written,
2182 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002183 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002184 out->use += written;
2185 out->content[out->use] = 0;
2186 }
Owen Taylor3473f882001-02-23 17:55:21 +00002187 }
2188#ifdef LIBXML_ICONV_ENABLED
2189 else if (handler->iconv_out != NULL) {
2190 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2191 &written, NULL, &toconv);
2192 out->use += written;
2193 out->content[out->use] = 0;
2194 }
2195#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002196#ifdef LIBXML_ICU_ENABLED
2197 else if (handler->uconv_out != NULL) {
2198 ret = xmlUconvWrapper(handler->uconv_out, 0,
2199 &out->content[out->use],
2200 &written, NULL, &toconv);
2201 out->use += written;
2202 out->content[out->use] = 0;
2203 }
2204#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002205#ifdef DEBUG_ENCODING
2206 xmlGenericError(xmlGenericErrorContext,
2207 "initialized encoder\n");
2208#endif
2209 return(0);
2210 }
2211
2212 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002213 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002214 */
2215 toconv = in->use;
2216 if (toconv == 0)
2217 return(0);
Daniel Veillardf1245392008-04-03 09:46:34 +00002218 if (toconv * 4 >= written) {
2219 xmlBufferGrow(out, toconv * 4);
Owen Taylor3473f882001-02-23 17:55:21 +00002220 written = out->size - out->use - 1;
2221 }
2222 if (handler->output != NULL) {
2223 ret = handler->output(&out->content[out->use], &written,
2224 in->content, &toconv);
Daniel Veillarde83e93e2008-08-30 12:52:26 +00002225 if (written > 0) {
2226 xmlBufferShrink(in, toconv);
2227 out->use += written;
2228 writtentot += written;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002229 }
Owen Taylor3473f882001-02-23 17:55:21 +00002230 out->content[out->use] = 0;
2231 }
2232#ifdef LIBXML_ICONV_ENABLED
2233 else if (handler->iconv_out != NULL) {
2234 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2235 &written, in->content, &toconv);
2236 xmlBufferShrink(in, toconv);
2237 out->use += written;
2238 writtentot += written;
2239 out->content[out->use] = 0;
2240 if (ret == -1) {
2241 if (written > 0) {
2242 /*
2243 * Can be a limitation of iconv
2244 */
2245 goto retry;
2246 }
2247 ret = -3;
2248 }
2249 }
2250#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002251#ifdef LIBXML_ICU_ENABLED
2252 else if (handler->uconv_out != NULL) {
2253 ret = xmlUconvWrapper(handler->uconv_out, 0,
2254 &out->content[out->use],
2255 &written, in->content, &toconv);
2256 xmlBufferShrink(in, toconv);
2257 out->use += written;
2258 writtentot += written;
2259 out->content[out->use] = 0;
2260 if (ret == -1) {
2261 if (written > 0) {
2262 /*
2263 * Can be a limitation of iconv
2264 */
2265 goto retry;
2266 }
2267 ret = -3;
2268 }
2269 }
2270#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002271 else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002272 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2273 "xmlCharEncOutFunc: no output function !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002274 return(-1);
2275 }
2276
2277 if (ret >= 0) output += ret;
2278
2279 /*
2280 * Attempt to handle error cases
2281 */
2282 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002283 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002284#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlGenericError(xmlGenericErrorContext,
2286 "converted %d bytes to %d bytes of output\n",
2287 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002288#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002289 break;
2290 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002291#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002292 xmlGenericError(xmlGenericErrorContext,
2293 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002294#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002295 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002296 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002297#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002298 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2299 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002300#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002301 break;
2302 case -2: {
2303 int len = in->use;
2304 const xmlChar *utf = (const xmlChar *) in->content;
2305 int cur;
2306
2307 cur = xmlGetUTF8Char(utf, &len);
2308 if (cur > 0) {
2309 xmlChar charref[20];
2310
2311#ifdef DEBUG_ENCODING
2312 xmlGenericError(xmlGenericErrorContext,
2313 "handling output conversion error\n");
2314 xmlGenericError(xmlGenericErrorContext,
2315 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2316 in->content[0], in->content[1],
2317 in->content[2], in->content[3]);
2318#endif
2319 /*
2320 * Removes the UTF8 sequence, and replace it by a charref
2321 * and continue the transcoding phase, hoping the error
2322 * did not mangle the encoder state.
2323 */
Daniel Veillard2e7598c2005-09-02 12:28:34 +00002324 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlBufferShrink(in, len);
2326 xmlBufferAddHead(in, charref, -1);
2327
2328 goto retry;
2329 } else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002330 char buf[50];
2331
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002332 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002333 in->content[0], in->content[1],
2334 in->content[2], in->content[3]);
2335 buf[49] = 0;
2336 xmlEncodingErr(XML_I18N_CONV_FAILED,
2337 "output conversion failed due to conv error, bytes %s\n",
2338 buf);
Daniel Veillarddf750622006-05-02 12:24:06 +00002339 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2340 in->content[0] = ' ';
Owen Taylor3473f882001-02-23 17:55:21 +00002341 }
2342 break;
2343 }
2344 }
2345 return(ret);
2346}
2347
2348/**
2349 * xmlCharEncCloseFunc:
2350 * @handler: char enconding transformation data structure
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002351 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002352 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002353 *
2354 * Returns 0 if success, or -1 in case of error
2355 */
2356int
2357xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2358 int ret = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002359 int tofree = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002360 if (handler == NULL) return(-1);
2361 if (handler->name == NULL) return(-1);
2362#ifdef LIBXML_ICONV_ENABLED
2363 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002364 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002365 * and the associated icon resources.
2366 */
2367 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002368 tofree = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 if (handler->iconv_out != NULL) {
2370 if (iconv_close(handler->iconv_out))
2371 ret = -1;
2372 handler->iconv_out = NULL;
2373 }
2374 if (handler->iconv_in != NULL) {
2375 if (iconv_close(handler->iconv_in))
2376 ret = -1;
2377 handler->iconv_in = NULL;
2378 }
Owen Taylor3473f882001-02-23 17:55:21 +00002379 }
2380#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002381#ifdef LIBXML_ICU_ENABLED
2382 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2383 tofree = 1;
2384 if (handler->uconv_out != NULL) {
2385 closeIcuConverter(handler->uconv_out);
2386 handler->uconv_out = NULL;
2387 }
2388 if (handler->uconv_in != NULL) {
2389 closeIcuConverter(handler->uconv_in);
2390 handler->uconv_in = NULL;
2391 }
2392 }
2393#endif
2394 if (tofree) {
2395 /* free up only dynamic handlers iconv/uconv */
2396 if (handler->name != NULL)
2397 xmlFree(handler->name);
2398 handler->name = NULL;
2399 xmlFree(handler);
2400 }
Owen Taylor3473f882001-02-23 17:55:21 +00002401#ifdef DEBUG_ENCODING
2402 if (ret)
2403 xmlGenericError(xmlGenericErrorContext,
2404 "failed to close the encoding handler\n");
2405 else
2406 xmlGenericError(xmlGenericErrorContext,
2407 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002408#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002409
Owen Taylor3473f882001-02-23 17:55:21 +00002410 return(ret);
2411}
2412
Daniel Veillard36711902004-02-11 13:25:26 +00002413/**
2414 * xmlByteConsumed:
2415 * @ctxt: an XML parser context
2416 *
2417 * This function provides the current index of the parser relative
2418 * to the start of the current entity. This function is computed in
2419 * bytes from the beginning starting at zero and finishing at the
2420 * size in byte of the file if parsing a file. The function is
2421 * of constant cost if the input is UTF-8 but can be costly if run
2422 * on non-UTF-8 input.
2423 *
2424 * Returns the index in bytes from the beginning of the entity or -1
2425 * in case the index could not be computed.
2426 */
2427long
2428xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2429 xmlParserInputPtr in;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002430
Daniel Veillard36711902004-02-11 13:25:26 +00002431 if (ctxt == NULL) return(-1);
2432 in = ctxt->input;
2433 if (in == NULL) return(-1);
2434 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2435 unsigned int unused = 0;
2436 xmlCharEncodingHandler * handler = in->buf->encoder;
2437 /*
2438 * Encoding conversion, compute the number of unused original
2439 * bytes from the input not consumed and substract that from
2440 * the raw consumed value, this is not a cheap operation
2441 */
2442 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002443 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002444 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002445 int toconv = in->end - in->cur, written = 32000;
2446
2447 int ret;
2448
2449 if (handler->output != NULL) {
2450 do {
2451 toconv = in->end - cur;
2452 written = 32000;
2453 ret = handler->output(&convbuf[0], &written,
2454 cur, &toconv);
2455 if (ret == -1) return(-1);
2456 unused += written;
2457 cur += toconv;
2458 } while (ret == -2);
2459#ifdef LIBXML_ICONV_ENABLED
2460 } else if (handler->iconv_out != NULL) {
2461 do {
2462 toconv = in->end - cur;
2463 written = 32000;
2464 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2465 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002466 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002467 if (written > 0)
2468 ret = -2;
2469 else
2470 return(-1);
2471 }
2472 unused += written;
2473 cur += toconv;
2474 } while (ret == -2);
2475#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002476#ifdef LIBXML_ICU_ENABLED
2477 } else if (handler->uconv_out != NULL) {
2478 do {
2479 toconv = in->end - cur;
2480 written = 32000;
2481 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2482 &written, cur, &toconv);
2483 if (ret < 0) {
2484 if (written > 0)
2485 ret = -2;
2486 else
2487 return(-1);
2488 }
2489 unused += written;
2490 cur += toconv;
2491 } while (ret == -2);
2492#endif
Daniel Veillard36711902004-02-11 13:25:26 +00002493 } else {
2494 /* could not find a converter */
2495 return(-1);
2496 }
2497 }
2498 if (in->buf->rawconsumed < unused)
2499 return(-1);
2500 return(in->buf->rawconsumed - unused);
2501 }
2502 return(in->consumed + (in->cur - in->base));
2503}
2504
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002505#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002506#ifdef LIBXML_ISO8859X_ENABLED
2507
2508/**
2509 * UTF8ToISO8859x:
2510 * @out: a pointer to an array of bytes to store the result
2511 * @outlen: the length of @out
2512 * @in: a pointer to an array of UTF-8 chars
2513 * @inlen: the length of @in
2514 * @xlattable: the 2-level transcoding table
2515 *
2516 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2517 * block of chars out.
2518 *
2519 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2520 * The value of @inlen after return is the number of octets consumed
2521 * as the return value is positive, else unpredictable.
2522 * The value of @outlen after return is the number of ocetes consumed.
2523 */
2524static int
2525UTF8ToISO8859x(unsigned char* out, int *outlen,
2526 const unsigned char* in, int *inlen,
2527 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002528 const unsigned char* outstart = out;
2529 const unsigned char* inend;
2530 const unsigned char* instart = in;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002531 const unsigned char* processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002532
Daniel Veillardce682bc2004-11-05 17:22:25 +00002533 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2534 (xlattable == NULL))
2535 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002536 if (in == NULL) {
2537 /*
2538 * initialization nothing to do
2539 */
2540 *outlen = 0;
2541 *inlen = 0;
2542 return(0);
2543 }
2544 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002545 while (in < inend) {
2546 unsigned char d = *in++;
2547 if (d < 0x80) {
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002548 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002549 } else if (d < 0xC0) {
2550 /* trailing byte in leading position */
2551 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002552 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002553 return(-2);
2554 } else if (d < 0xE0) {
2555 unsigned char c;
2556 if (!(in < inend)) {
2557 /* trailing byte not in input buffer */
2558 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002559 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01002560 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002561 }
2562 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002563 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002564 /* not a trailing byte */
2565 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002566 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002567 return(-2);
2568 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002569 c = c & 0x3F;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002570 d = d & 0x1F;
2571 d = xlattable [48 + c + xlattable [d] * 64];
2572 if (d == 0) {
2573 /* not in character set */
2574 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002575 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002576 return(-2);
2577 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002578 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002579 } else if (d < 0xF0) {
2580 unsigned char c1;
2581 unsigned char c2;
2582 if (!(in < inend - 1)) {
2583 /* trailing bytes not in input buffer */
2584 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002585 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01002586 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002587 }
2588 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002589 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002590 /* not a trailing byte (c1) */
2591 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002592 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002593 return(-2);
2594 }
2595 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002596 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002597 /* not a trailing byte (c2) */
2598 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002599 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002600 return(-2);
2601 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002602 c1 = c1 & 0x3F;
2603 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002604 d = d & 0x0F;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002605 d = xlattable [48 + c2 + xlattable [48 + c1 +
William M. Brackf54924b2004-09-09 14:35:17 +00002606 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002607 if (d == 0) {
2608 /* not in character set */
2609 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002610 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002611 return(-2);
2612 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002613 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002614 } else {
2615 /* cannot transcode >= U+010000 */
2616 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002617 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002618 return(-2);
2619 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002620 processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002621 }
2622 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002623 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002624 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002625}
2626
2627/**
2628 * ISO8859xToUTF8
2629 * @out: a pointer to an array of bytes to store the result
2630 * @outlen: the length of @out
2631 * @in: a pointer to an array of ISO Latin 1 chars
2632 * @inlen: the length of @in
2633 *
2634 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2635 * block of chars out.
2636 * Returns 0 if success, or -1 otherwise
2637 * The value of @inlen after return is the number of octets consumed
2638 * The value of @outlen after return is the number of ocetes produced.
2639 */
2640static int
2641ISO8859xToUTF8(unsigned char* out, int *outlen,
2642 const unsigned char* in, int *inlen,
2643 unsigned short const *unicodetable) {
2644 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002645 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002646 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002647 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00002648 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002649 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002650
Daniel Veillardce682bc2004-11-05 17:22:25 +00002651 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002652 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002653 return(-1);
2654 outend = out + *outlen;
2655 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00002656 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002657
2658 while ((in < inend) && (out < outend - 2)) {
2659 if (*in >= 0x80) {
2660 c = unicodetable [*in - 0x80];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002661 if (c == 0) {
2662 /* undefined code point */
2663 *outlen = out - outstart;
2664 *inlen = in - instart;
2665 return (-1);
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002666 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002667 if (c < 0x800) {
2668 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2669 *out++ = (c & 0x3F) | 0x80;
2670 } else {
2671 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2672 *out++ = ((c >> 6) & 0x3F) | 0x80;
2673 *out++ = (c & 0x3F) | 0x80;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002674 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002675 ++in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002676 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002677 if (instop - in > outend - out) instop = in + (outend - out);
2678 while ((*in < 0x80) && (in < instop)) {
2679 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002680 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002681 }
2682 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2683 *out++ = *in++;
2684 }
2685 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2686 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002687 }
2688 *outlen = out - outstart;
2689 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002690 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002691}
2692
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002693
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002694/************************************************************************
2695 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2696 ************************************************************************/
2697
2698static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002699 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2700 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2701 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2702 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2703 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2704 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2705 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2706 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2707 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2708 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2709 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2710 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2711 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2712 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2713 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2714 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002715};
2716
2717static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2718 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2726 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2727 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2728 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2730 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2733 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2734 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2738 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2739 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2740 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2741 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2742 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2743 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2744 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2745};
2746
2747static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002748 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2749 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2750 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2751 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2752 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2753 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2754 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2755 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2756 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2757 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2758 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2759 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2760 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2761 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2762 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2763 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002764};
2765
2766static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2767 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2774 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2775 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2776 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2777 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2780 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2786 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2794 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2795 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2796 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2797 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2798};
2799
2800static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002801 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2802 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2803 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2804 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2805 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2806 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2807 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2808 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2809 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2810 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2811 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2812 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2813 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2814 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2815 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2816 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002817};
2818
2819static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2820 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2821 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2828 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2829 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2830 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2831 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2832 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2833 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2834 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2835 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2836 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2837 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2838 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2843 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2844 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2845 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2846 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2847};
2848
2849static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002850 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2851 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2852 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2853 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2854 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2855 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2856 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2857 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2858 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2859 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2860 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2861 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2862 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2863 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2864 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2865 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002866};
2867
2868static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2869 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2870 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2871 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2877 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2878 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2881 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2882 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2883 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2884 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2885 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896};
2897
2898static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002899 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2900 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2901 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2902 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2903 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2904 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2905 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2906 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2907 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2908 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2909 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2910 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2911 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2912 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2913 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2914 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002915};
2916
2917static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2918 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2926 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2927 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2929 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2930 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2935 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2936 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2937 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2938 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941};
2942
2943static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002944 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2945 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2946 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2947 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2948 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2949 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2950 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2951 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2952 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2953 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2954 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2955 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2956 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2957 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2958 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2959 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002960};
2961
2962static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2963 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2964 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2971 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2972 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2973 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2974 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2987 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2988 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2989 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2990 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994};
2995
2996static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002997 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2998 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2999 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3000 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3001 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3002 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3003 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3004 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3005 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3006 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3007 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3008 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3009 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3010 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3011 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3012 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003013};
3014
3015static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3016 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3024 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3025 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3026 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3027 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3040 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3041 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3045 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047};
3048
3049static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003050 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3051 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3052 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3053 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3054 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3055 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3056 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3057 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3058 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3059 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3060 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3061 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3062 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3063 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3064 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3065 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003066};
3067
3068static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3069 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3077 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3078 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3079 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3080 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3081 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3082 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3083 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3084 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092};
3093
3094static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003095 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3096 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3097 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3098 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3099 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3100 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3101 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3102 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3103 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3104 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3105 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3106 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3107 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3108 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3109 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3110 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003111};
3112
3113static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3114 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3122 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3123 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3124 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3126 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3128 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3132 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3133 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3142 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3143 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3144 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3145};
3146
3147static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003148 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3149 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3150 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3151 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3152 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3153 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3154 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3155 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3156 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3157 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3158 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3159 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3160 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3161 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3162 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3163 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003164};
3165
3166static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3167 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3175 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3176 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3182 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3183 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3184 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3185 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3186 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3191 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194};
3195
3196static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003197 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3198 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3199 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3200 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3201 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3202 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3203 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3204 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3205 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3206 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3207 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3208 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3209 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3210 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3211 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3212 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003213};
3214
3215static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3216 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3224 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3225 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3226 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3227 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3237 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3239 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3241 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3242 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3243 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3244 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3246 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3247};
3248
3249static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003250 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3251 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3252 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3253 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3254 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3255 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3256 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3257 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3258 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3259 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3260 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3261 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3262 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3263 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3264 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3265 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003266};
3267
3268static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3269 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3277 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3278 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3279 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3284 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3304 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3309 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3310 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3311 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3312};
3313
3314static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003315 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3316 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3317 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3318 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3319 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3320 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3321 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3322 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3323 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3324 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3325 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3326 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3327 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3328 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3329 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3330 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003331};
3332
3333static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3334 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3342 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3343 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3344 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3345 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3352 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3357 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3358 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3359 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3360 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3361};
3362
3363static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003364 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3369 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3370 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3371 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3372 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3373 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3374 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3375 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3376 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3377 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3378 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3379 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003380};
3381
3382static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3383 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3393 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3394 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3395 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3400 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3402 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3420 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3422};
3423
3424
3425/*
3426 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3427 */
3428
3429static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3430 const unsigned char* in, int *inlen) {
3431 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3432}
3433static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3434 const unsigned char* in, int *inlen) {
3435 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3436}
3437
3438static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3439 const unsigned char* in, int *inlen) {
3440 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3441}
3442static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3443 const unsigned char* in, int *inlen) {
3444 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3445}
3446
3447static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3448 const unsigned char* in, int *inlen) {
3449 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3450}
3451static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3452 const unsigned char* in, int *inlen) {
3453 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3454}
3455
3456static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3457 const unsigned char* in, int *inlen) {
3458 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3459}
3460static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3461 const unsigned char* in, int *inlen) {
3462 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3463}
3464
3465static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3466 const unsigned char* in, int *inlen) {
3467 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3468}
3469static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3470 const unsigned char* in, int *inlen) {
3471 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3472}
3473
3474static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3475 const unsigned char* in, int *inlen) {
3476 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3477}
3478static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3479 const unsigned char* in, int *inlen) {
3480 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3481}
3482
3483static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3484 const unsigned char* in, int *inlen) {
3485 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3486}
3487static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3488 const unsigned char* in, int *inlen) {
3489 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3490}
3491
3492static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3493 const unsigned char* in, int *inlen) {
3494 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3495}
3496static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3497 const unsigned char* in, int *inlen) {
3498 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3499}
3500
3501static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3502 const unsigned char* in, int *inlen) {
3503 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3504}
3505static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3506 const unsigned char* in, int *inlen) {
3507 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3508}
3509
3510static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3511 const unsigned char* in, int *inlen) {
3512 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3513}
3514static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3515 const unsigned char* in, int *inlen) {
3516 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3517}
3518
3519static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3520 const unsigned char* in, int *inlen) {
3521 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3522}
3523static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3524 const unsigned char* in, int *inlen) {
3525 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3526}
3527
3528static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3529 const unsigned char* in, int *inlen) {
3530 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3531}
3532static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3533 const unsigned char* in, int *inlen) {
3534 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3535}
3536
3537static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3538 const unsigned char* in, int *inlen) {
3539 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3540}
3541static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3542 const unsigned char* in, int *inlen) {
3543 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3544}
3545
3546static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3547 const unsigned char* in, int *inlen) {
3548 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3549}
3550static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3551 const unsigned char* in, int *inlen) {
3552 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3553}
3554
3555static void
3556xmlRegisterCharEncodingHandlersISO8859x (void) {
3557 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3558 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3559 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3560 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3561 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3562 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3563 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3564 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3565 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3566 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3567 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3568 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3569 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3570 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3571}
3572
3573#endif
3574#endif
3575
Daniel Veillard5d4644e2005-04-01 13:11:58 +00003576#define bottom_encoding
3577#include "elfgcchack.h"