blob: 6c49fff1d3f6f41a461b0f00e67010bc8ad8d139 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000073/**
74 * xmlEncodingErrMemory:
75 * @extra: extra informations
76 *
77 * Handle an out of memory condition
78 */
79static void
80xmlEncodingErrMemory(const char *extra)
81{
82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83}
84
85/**
86 * xmlErrEncoding:
87 * @error: the error number
88 * @msg: the error message
89 *
90 * n encoding error
91 */
92static void
93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94{
95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96 XML_FROM_I18N, error, XML_ERR_FATAL,
97 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98}
Daniel Veillard97ac1312001-05-30 19:14:17 +000099
100/************************************************************************
101 * *
102 * Conversions To/From UTF8 encoding *
103 * *
104 ************************************************************************/
105
106/**
Owen Taylor3473f882001-02-23 17:55:21 +0000107 * asciiToUTF8:
108 * @out: a pointer to an array of bytes to store the result
109 * @outlen: the length of @out
110 * @in: a pointer to an array of ASCII chars
111 * @inlen: the length of @in
112 *
113 * Take a block of ASCII chars in and try to convert it to an UTF-8
114 * block of chars out.
115 * Returns 0 if success, or -1 otherwise
116 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000117 * if the return value is positive, else unpredictable.
118 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000120static int
Owen Taylor3473f882001-02-23 17:55:21 +0000121asciiToUTF8(unsigned char* out, int *outlen,
122 const unsigned char* in, int *inlen) {
123 unsigned char* outstart = out;
124 const unsigned char* base = in;
125 const unsigned char* processed = in;
126 unsigned char* outend = out + *outlen;
127 const unsigned char* inend;
128 unsigned int c;
Owen Taylor3473f882001-02-23 17:55:21 +0000129
130 inend = in + (*inlen);
131 while ((in < inend) && (out - outstart + 5 < *outlen)) {
132 c= *in++;
133
Owen Taylor3473f882001-02-23 17:55:21 +0000134 if (out >= outend)
135 break;
Daniel Veillard2728f842006-03-09 16:49:24 +0000136 if (c < 0x80) {
137 *out++ = c;
138 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000139 *outlen = out - outstart;
140 *inlen = processed - base;
141 return(-1);
142 }
143
Owen Taylor3473f882001-02-23 17:55:21 +0000144 processed = (const unsigned char*) in;
145 }
146 *outlen = out - outstart;
147 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000148 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000149}
150
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000151#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000152/**
153 * UTF8Toascii:
154 * @out: a pointer to an array of bytes to store the result
155 * @outlen: the length of @out
156 * @in: a pointer to an array of UTF-8 chars
157 * @inlen: the length of @in
158 *
159 * Take a block of UTF-8 chars in and try to convert it to an ASCII
160 * block of chars out.
161 *
162 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
163 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000164 * if the return value is positive, else unpredictable.
165 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000166 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int
Owen Taylor3473f882001-02-23 17:55:21 +0000168UTF8Toascii(unsigned char* out, int *outlen,
169 const unsigned char* in, int *inlen) {
170 const unsigned char* processed = in;
171 const unsigned char* outend;
172 const unsigned char* outstart = out;
173 const unsigned char* instart = in;
174 const unsigned char* inend;
175 unsigned int c, d;
176 int trailing;
177
Daniel Veillardce682bc2004-11-05 17:22:25 +0000178 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000179 if (in == NULL) {
180 /*
181 * initialization nothing to do
182 */
183 *outlen = 0;
184 *inlen = 0;
185 return(0);
186 }
187 inend = in + (*inlen);
188 outend = out + (*outlen);
189 while (in < inend) {
190 d = *in++;
191 if (d < 0x80) { c= d; trailing= 0; }
192 else if (d < 0xC0) {
193 /* trailing byte in leading position */
194 *outlen = out - outstart;
195 *inlen = processed - instart;
196 return(-2);
197 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
198 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
199 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
200 else {
201 /* no chance for this in Ascii */
202 *outlen = out - outstart;
203 *inlen = processed - instart;
204 return(-2);
205 }
206
207 if (inend - in < trailing) {
208 break;
209 }
210
211 for ( ; trailing; trailing--) {
212 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
213 break;
214 c <<= 6;
215 c |= d & 0x3F;
216 }
217
218 /* assertion: c is a single UTF-4 value */
219 if (c < 0x80) {
220 if (out >= outend)
221 break;
222 *out++ = c;
223 } else {
224 /* no chance for this in Ascii */
225 *outlen = out - outstart;
226 *inlen = processed - instart;
227 return(-2);
228 }
229 processed = in;
230 }
231 *outlen = out - outstart;
232 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000233 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000234}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000235#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000236
237/**
238 * isolat1ToUTF8:
239 * @out: a pointer to an array of bytes to store the result
240 * @outlen: the length of @out
241 * @in: a pointer to an array of ISO Latin 1 chars
242 * @inlen: the length of @in
243 *
244 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
245 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000246 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000247 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000248 * if the return value is positive, else unpredictable.
249 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000250 */
251int
252isolat1ToUTF8(unsigned char* out, int *outlen,
253 const unsigned char* in, int *inlen) {
254 unsigned char* outstart = out;
255 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000256 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000257 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000258 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000259
Daniel Veillardce682bc2004-11-05 17:22:25 +0000260 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
261 return(-1);
262
263 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000264 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000265 instop = inend;
266
267 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000268 if (*in >= 0x80) {
269 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
270 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000271 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000272 }
273 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000274 while (in < instop && *in < 0x80) {
275 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000276 }
277 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000278 if (in < inend && out < outend && *in < 0x80) {
279 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000280 }
281 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000282 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000283 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000284}
285
Daniel Veillard81601f92003-01-14 13:42:37 +0000286/**
287 * UTF8ToUTF8:
288 * @out: a pointer to an array of bytes to store the result
289 * @outlen: the length of @out
290 * @inb: a pointer to an array of UTF-8 chars
291 * @inlenb: the length of @in in UTF-8 chars
292 *
293 * No op copy operation for UTF8 handling.
294 *
William M. Brackf9415e42003-11-28 09:39:10 +0000295 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000296 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000297 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000298 */
299static int
300UTF8ToUTF8(unsigned char* out, int *outlen,
301 const unsigned char* inb, int *inlenb)
302{
303 int len;
304
305 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
306 return(-1);
307 if (*outlen > *inlenb) {
308 len = *inlenb;
309 } else {
310 len = *outlen;
311 }
312 if (len < 0)
313 return(-1);
314
315 memcpy(out, inb, len);
316
317 *outlen = len;
318 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000319 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000320}
321
Daniel Veillarde72c7562002-05-31 09:47:30 +0000322
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000323#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000324/**
325 * UTF8Toisolat1:
326 * @out: a pointer to an array of bytes to store the result
327 * @outlen: the length of @out
328 * @in: a pointer to an array of UTF-8 chars
329 * @inlen: the length of @in
330 *
331 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
332 * block of chars out.
333 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000334 * Returns the number of bytes written if success, -2 if the transcoding fails,
335 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000336 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000337 * if the return value is positive, else unpredictable.
338 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000339 */
340int
341UTF8Toisolat1(unsigned char* out, int *outlen,
342 const unsigned char* in, int *inlen) {
343 const unsigned char* processed = in;
344 const unsigned char* outend;
345 const unsigned char* outstart = out;
346 const unsigned char* instart = in;
347 const unsigned char* inend;
348 unsigned int c, d;
349 int trailing;
350
Daniel Veillardce682bc2004-11-05 17:22:25 +0000351 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000352 if (in == NULL) {
353 /*
354 * initialization nothing to do
355 */
356 *outlen = 0;
357 *inlen = 0;
358 return(0);
359 }
360 inend = in + (*inlen);
361 outend = out + (*outlen);
362 while (in < inend) {
363 d = *in++;
364 if (d < 0x80) { c= d; trailing= 0; }
365 else if (d < 0xC0) {
366 /* trailing byte in leading position */
367 *outlen = out - outstart;
368 *inlen = processed - instart;
369 return(-2);
370 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
371 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
372 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
373 else {
374 /* no chance for this in IsoLat1 */
375 *outlen = out - outstart;
376 *inlen = processed - instart;
377 return(-2);
378 }
379
380 if (inend - in < trailing) {
381 break;
382 }
383
384 for ( ; trailing; trailing--) {
385 if (in >= inend)
386 break;
387 if (((d= *in++) & 0xC0) != 0x80) {
388 *outlen = out - outstart;
389 *inlen = processed - instart;
390 return(-2);
391 }
392 c <<= 6;
393 c |= d & 0x3F;
394 }
395
396 /* assertion: c is a single UTF-4 value */
397 if (c <= 0xFF) {
398 if (out >= outend)
399 break;
400 *out++ = c;
401 } else {
402 /* no chance for this in IsoLat1 */
403 *outlen = out - outstart;
404 *inlen = processed - instart;
405 return(-2);
406 }
407 processed = in;
408 }
409 *outlen = out - outstart;
410 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000411 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000412}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000413#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000414
415/**
416 * UTF16LEToUTF8:
417 * @out: a pointer to an array of bytes to store the result
418 * @outlen: the length of @out
419 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
420 * @inlenb: the length of @in in UTF-16LE chars
421 *
422 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000423 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000424 * is the same between the native type of this machine and the
425 * inputed one.
426 *
William M. Brackf9415e42003-11-28 09:39:10 +0000427 * Returns the number of bytes written, or -1 if lack of space, or -2
428 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000429 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000430 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000431 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000432static int
Owen Taylor3473f882001-02-23 17:55:21 +0000433UTF16LEToUTF8(unsigned char* out, int *outlen,
434 const unsigned char* inb, int *inlenb)
435{
436 unsigned char* outstart = out;
437 const unsigned char* processed = inb;
438 unsigned char* outend = out + *outlen;
439 unsigned short* in = (unsigned short*) inb;
440 unsigned short* inend;
441 unsigned int c, d, inlen;
442 unsigned char *tmp;
443 int bits;
444
445 if ((*inlenb % 2) == 1)
446 (*inlenb)--;
447 inlen = *inlenb / 2;
448 inend = in + inlen;
449 while ((in < inend) && (out - outstart + 5 < *outlen)) {
450 if (xmlLittleEndian) {
451 c= *in++;
452 } else {
453 tmp = (unsigned char *) in;
454 c = *tmp++;
455 c = c | (((unsigned int)*tmp) << 8);
456 in++;
457 }
458 if ((c & 0xFC00) == 0xD800) { /* surrogates */
459 if (in >= inend) { /* (in > inend) shouldn't happens */
460 break;
461 }
462 if (xmlLittleEndian) {
463 d = *in++;
464 } else {
465 tmp = (unsigned char *) in;
466 d = *tmp++;
467 d = d | (((unsigned int)*tmp) << 8);
468 in++;
469 }
470 if ((d & 0xFC00) == 0xDC00) {
471 c &= 0x03FF;
472 c <<= 10;
473 c |= d & 0x03FF;
474 c += 0x10000;
475 }
476 else {
477 *outlen = out - outstart;
478 *inlenb = processed - inb;
479 return(-2);
480 }
481 }
482
483 /* assertion: c is a single UTF-4 value */
484 if (out >= outend)
485 break;
486 if (c < 0x80) { *out++= c; bits= -6; }
487 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
488 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
489 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
490
491 for ( ; bits >= 0; bits-= 6) {
492 if (out >= outend)
493 break;
494 *out++= ((c >> bits) & 0x3F) | 0x80;
495 }
496 processed = (const unsigned char*) in;
497 }
498 *outlen = out - outstart;
499 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000500 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000501}
502
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000503#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000504/**
505 * UTF8ToUTF16LE:
506 * @outb: a pointer to an array of bytes to store the result
507 * @outlen: the length of @outb
508 * @in: a pointer to an array of UTF-8 chars
509 * @inlen: the length of @in
510 *
511 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
512 * block of chars out.
513 *
William M. Brackf9415e42003-11-28 09:39:10 +0000514 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000515 * if the transcoding failed.
516 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000517static int
Owen Taylor3473f882001-02-23 17:55:21 +0000518UTF8ToUTF16LE(unsigned char* outb, int *outlen,
519 const unsigned char* in, int *inlen)
520{
521 unsigned short* out = (unsigned short*) outb;
522 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000523 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000524 unsigned short* outstart= out;
525 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000526 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 unsigned int c, d;
528 int trailing;
529 unsigned char *tmp;
530 unsigned short tmp1, tmp2;
531
William M. Brackf9415e42003-11-28 09:39:10 +0000532 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000533 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000534 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000535 *outlen = 0;
536 *inlen = 0;
537 return(0);
538 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000539 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000540 outend = out + (*outlen / 2);
541 while (in < inend) {
542 d= *in++;
543 if (d < 0x80) { c= d; trailing= 0; }
544 else if (d < 0xC0) {
545 /* trailing byte in leading position */
546 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000547 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000548 return(-2);
549 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
550 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
551 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
552 else {
553 /* no chance for this in UTF-16 */
554 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000555 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000556 return(-2);
557 }
558
559 if (inend - in < trailing) {
560 break;
561 }
562
563 for ( ; trailing; trailing--) {
564 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
565 break;
566 c <<= 6;
567 c |= d & 0x3F;
568 }
569
570 /* assertion: c is a single UTF-4 value */
571 if (c < 0x10000) {
572 if (out >= outend)
573 break;
574 if (xmlLittleEndian) {
575 *out++ = c;
576 } else {
577 tmp = (unsigned char *) out;
578 *tmp = c ;
579 *(tmp + 1) = c >> 8 ;
580 out++;
581 }
582 }
583 else if (c < 0x110000) {
584 if (out+1 >= outend)
585 break;
586 c -= 0x10000;
587 if (xmlLittleEndian) {
588 *out++ = 0xD800 | (c >> 10);
589 *out++ = 0xDC00 | (c & 0x03FF);
590 } else {
591 tmp1 = 0xD800 | (c >> 10);
592 tmp = (unsigned char *) out;
593 *tmp = (unsigned char) tmp1;
594 *(tmp + 1) = tmp1 >> 8;
595 out++;
596
597 tmp2 = 0xDC00 | (c & 0x03FF);
598 tmp = (unsigned char *) out;
599 *tmp = (unsigned char) tmp2;
600 *(tmp + 1) = tmp2 >> 8;
601 out++;
602 }
603 }
604 else
605 break;
606 processed = in;
607 }
608 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000609 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000610 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000611}
612
613/**
William M. Brackf9415e42003-11-28 09:39:10 +0000614 * UTF8ToUTF16:
615 * @outb: a pointer to an array of bytes to store the result
616 * @outlen: the length of @outb
617 * @in: a pointer to an array of UTF-8 chars
618 * @inlen: the length of @in
619 *
620 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
621 * block of chars out.
622 *
623 * Returns the number of bytes written, or -1 if lack of space, or -2
624 * if the transcoding failed.
625 */
626static int
627UTF8ToUTF16(unsigned char* outb, int *outlen,
628 const unsigned char* in, int *inlen)
629{
630 if (in == NULL) {
631 /*
632 * initialization, add the Byte Order Mark for UTF-16LE
633 */
634 if (*outlen >= 2) {
635 outb[0] = 0xFF;
636 outb[1] = 0xFE;
637 *outlen = 2;
638 *inlen = 0;
639#ifdef DEBUG_ENCODING
640 xmlGenericError(xmlGenericErrorContext,
641 "Added FFFE Byte Order Mark\n");
642#endif
643 return(2);
644 }
645 *outlen = 0;
646 *inlen = 0;
647 return(0);
648 }
649 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
650}
William M. Brack030a7a12004-02-10 12:48:57 +0000651#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000652
653/**
Owen Taylor3473f882001-02-23 17:55:21 +0000654 * UTF16BEToUTF8:
655 * @out: a pointer to an array of bytes to store the result
656 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000657 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000658 * @inlenb: the length of @in in UTF-16 chars
659 *
660 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000661 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000662 * is the same between the native type of this machine and the
663 * inputed one.
664 *
William M. Brackf9415e42003-11-28 09:39:10 +0000665 * Returns the number of bytes written, or -1 if lack of space, or -2
666 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000667 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000668 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000669 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000670static int
Owen Taylor3473f882001-02-23 17:55:21 +0000671UTF16BEToUTF8(unsigned char* out, int *outlen,
672 const unsigned char* inb, int *inlenb)
673{
674 unsigned char* outstart = out;
675 const unsigned char* processed = inb;
676 unsigned char* outend = out + *outlen;
677 unsigned short* in = (unsigned short*) inb;
678 unsigned short* inend;
679 unsigned int c, d, inlen;
680 unsigned char *tmp;
681 int bits;
682
683 if ((*inlenb % 2) == 1)
684 (*inlenb)--;
685 inlen = *inlenb / 2;
686 inend= in + inlen;
687 while (in < inend) {
688 if (xmlLittleEndian) {
689 tmp = (unsigned char *) in;
690 c = *tmp++;
691 c = c << 8;
692 c = c | (unsigned int) *tmp;
693 in++;
694 } else {
695 c= *in++;
696 }
697 if ((c & 0xFC00) == 0xD800) { /* surrogates */
698 if (in >= inend) { /* (in > inend) shouldn't happens */
699 *outlen = out - outstart;
700 *inlenb = processed - inb;
701 return(-2);
702 }
703 if (xmlLittleEndian) {
704 tmp = (unsigned char *) in;
705 d = *tmp++;
706 d = d << 8;
707 d = d | (unsigned int) *tmp;
708 in++;
709 } else {
710 d= *in++;
711 }
712 if ((d & 0xFC00) == 0xDC00) {
713 c &= 0x03FF;
714 c <<= 10;
715 c |= d & 0x03FF;
716 c += 0x10000;
717 }
718 else {
719 *outlen = out - outstart;
720 *inlenb = processed - inb;
721 return(-2);
722 }
723 }
724
725 /* assertion: c is a single UTF-4 value */
726 if (out >= outend)
727 break;
728 if (c < 0x80) { *out++= c; bits= -6; }
729 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
730 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
731 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
732
733 for ( ; bits >= 0; bits-= 6) {
734 if (out >= outend)
735 break;
736 *out++= ((c >> bits) & 0x3F) | 0x80;
737 }
738 processed = (const unsigned char*) in;
739 }
740 *outlen = out - outstart;
741 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000742 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000743}
744
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000745#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000746/**
747 * UTF8ToUTF16BE:
748 * @outb: a pointer to an array of bytes to store the result
749 * @outlen: the length of @outb
750 * @in: a pointer to an array of UTF-8 chars
751 * @inlen: the length of @in
752 *
753 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
754 * block of chars out.
755 *
756 * Returns the number of byte written, or -1 by lack of space, or -2
757 * if the transcoding failed.
758 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000759static int
Owen Taylor3473f882001-02-23 17:55:21 +0000760UTF8ToUTF16BE(unsigned char* outb, int *outlen,
761 const unsigned char* in, int *inlen)
762{
763 unsigned short* out = (unsigned short*) outb;
764 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000765 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000766 unsigned short* outstart= out;
767 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000768 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000769 unsigned int c, d;
770 int trailing;
771 unsigned char *tmp;
772 unsigned short tmp1, tmp2;
773
William M. Brackf9415e42003-11-28 09:39:10 +0000774 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000775 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000776 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000777 *outlen = 0;
778 *inlen = 0;
779 return(0);
780 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000781 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000782 outend = out + (*outlen / 2);
783 while (in < inend) {
784 d= *in++;
785 if (d < 0x80) { c= d; trailing= 0; }
786 else if (d < 0xC0) {
787 /* trailing byte in leading position */
788 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000789 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000790 return(-2);
791 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
792 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
793 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
794 else {
795 /* no chance for this in UTF-16 */
796 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000797 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000798 return(-2);
799 }
800
801 if (inend - in < trailing) {
802 break;
803 }
804
805 for ( ; trailing; trailing--) {
806 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
807 c <<= 6;
808 c |= d & 0x3F;
809 }
810
811 /* assertion: c is a single UTF-4 value */
812 if (c < 0x10000) {
813 if (out >= outend) break;
814 if (xmlLittleEndian) {
815 tmp = (unsigned char *) out;
816 *tmp = c >> 8;
817 *(tmp + 1) = c;
818 out++;
819 } else {
820 *out++ = c;
821 }
822 }
823 else if (c < 0x110000) {
824 if (out+1 >= outend) break;
825 c -= 0x10000;
826 if (xmlLittleEndian) {
827 tmp1 = 0xD800 | (c >> 10);
828 tmp = (unsigned char *) out;
829 *tmp = tmp1 >> 8;
830 *(tmp + 1) = (unsigned char) tmp1;
831 out++;
832
833 tmp2 = 0xDC00 | (c & 0x03FF);
834 tmp = (unsigned char *) out;
835 *tmp = tmp2 >> 8;
836 *(tmp + 1) = (unsigned char) tmp2;
837 out++;
838 } else {
839 *out++ = 0xD800 | (c >> 10);
840 *out++ = 0xDC00 | (c & 0x03FF);
841 }
842 }
843 else
844 break;
845 processed = in;
846 }
847 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000848 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000849 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000850}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000851#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000852
Daniel Veillard97ac1312001-05-30 19:14:17 +0000853/************************************************************************
854 * *
855 * Generic encoding handling routines *
856 * *
857 ************************************************************************/
858
Owen Taylor3473f882001-02-23 17:55:21 +0000859/**
860 * xmlDetectCharEncoding:
861 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000862 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000863 * @len: pointer to the length of the buffer
864 *
865 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000866 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000867 *
868 * Returns one of the XML_CHAR_ENCODING_... values.
869 */
870xmlCharEncoding
871xmlDetectCharEncoding(const unsigned char* in, int len)
872{
Daniel Veillardce682bc2004-11-05 17:22:25 +0000873 if (in == NULL)
874 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000875 if (len >= 4) {
876 if ((in[0] == 0x00) && (in[1] == 0x00) &&
877 (in[2] == 0x00) && (in[3] == 0x3C))
878 return(XML_CHAR_ENCODING_UCS4BE);
879 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
880 (in[2] == 0x00) && (in[3] == 0x00))
881 return(XML_CHAR_ENCODING_UCS4LE);
882 if ((in[0] == 0x00) && (in[1] == 0x00) &&
883 (in[2] == 0x3C) && (in[3] == 0x00))
884 return(XML_CHAR_ENCODING_UCS4_2143);
885 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
886 (in[2] == 0x00) && (in[3] == 0x00))
887 return(XML_CHAR_ENCODING_UCS4_3412);
888 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
889 (in[2] == 0xA7) && (in[3] == 0x94))
890 return(XML_CHAR_ENCODING_EBCDIC);
891 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
892 (in[2] == 0x78) && (in[3] == 0x6D))
893 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000894 /*
895 * Although not part of the recommendation, we also
896 * attempt an "auto-recognition" of UTF-16LE and
897 * UTF-16BE encodings.
898 */
899 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
900 (in[2] == 0x3F) && (in[3] == 0x00))
901 return(XML_CHAR_ENCODING_UTF16LE);
902 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
903 (in[2] == 0x00) && (in[3] == 0x3F))
904 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000905 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000906 if (len >= 3) {
907 /*
908 * Errata on XML-1.0 June 20 2001
909 * We now allow an UTF8 encoded BOM
910 */
911 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
912 (in[2] == 0xBF))
913 return(XML_CHAR_ENCODING_UTF8);
914 }
William M. Brackf9415e42003-11-28 09:39:10 +0000915 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000916 if (len >= 2) {
917 if ((in[0] == 0xFE) && (in[1] == 0xFF))
918 return(XML_CHAR_ENCODING_UTF16BE);
919 if ((in[0] == 0xFF) && (in[1] == 0xFE))
920 return(XML_CHAR_ENCODING_UTF16LE);
921 }
922 return(XML_CHAR_ENCODING_NONE);
923}
924
925/**
926 * xmlCleanupEncodingAliases:
927 *
928 * Unregisters all aliases
929 */
930void
931xmlCleanupEncodingAliases(void) {
932 int i;
933
934 if (xmlCharEncodingAliases == NULL)
935 return;
936
937 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
938 if (xmlCharEncodingAliases[i].name != NULL)
939 xmlFree((char *) xmlCharEncodingAliases[i].name);
940 if (xmlCharEncodingAliases[i].alias != NULL)
941 xmlFree((char *) xmlCharEncodingAliases[i].alias);
942 }
943 xmlCharEncodingAliasesNb = 0;
944 xmlCharEncodingAliasesMax = 0;
945 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000946 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000947}
948
949/**
950 * xmlGetEncodingAlias:
951 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
952 *
953 * Lookup an encoding name for the given alias.
954 *
William M. Brackf9415e42003-11-28 09:39:10 +0000955 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000956 */
957const char *
958xmlGetEncodingAlias(const char *alias) {
959 int i;
960 char upper[100];
961
962 if (alias == NULL)
963 return(NULL);
964
965 if (xmlCharEncodingAliases == NULL)
966 return(NULL);
967
968 for (i = 0;i < 99;i++) {
969 upper[i] = toupper(alias[i]);
970 if (upper[i] == 0) break;
971 }
972 upper[i] = 0;
973
974 /*
975 * Walk down the list looking for a definition of the alias
976 */
977 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
978 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
979 return(xmlCharEncodingAliases[i].name);
980 }
981 }
982 return(NULL);
983}
984
985/**
986 * xmlAddEncodingAlias:
987 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
988 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
989 *
William M. Brackf9415e42003-11-28 09:39:10 +0000990 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000991 * will be overwritten.
992 *
993 * Returns 0 in case of success, -1 in case of error
994 */
995int
996xmlAddEncodingAlias(const char *name, const char *alias) {
997 int i;
998 char upper[100];
999
1000 if ((name == NULL) || (alias == NULL))
1001 return(-1);
1002
1003 for (i = 0;i < 99;i++) {
1004 upper[i] = toupper(alias[i]);
1005 if (upper[i] == 0) break;
1006 }
1007 upper[i] = 0;
1008
1009 if (xmlCharEncodingAliases == NULL) {
1010 xmlCharEncodingAliasesNb = 0;
1011 xmlCharEncodingAliasesMax = 20;
1012 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1013 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1014 if (xmlCharEncodingAliases == NULL)
1015 return(-1);
1016 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1017 xmlCharEncodingAliasesMax *= 2;
1018 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1019 xmlRealloc(xmlCharEncodingAliases,
1020 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1021 }
1022 /*
1023 * Walk down the list looking for a definition of the alias
1024 */
1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027 /*
1028 * Replace the definition.
1029 */
1030 xmlFree((char *) xmlCharEncodingAliases[i].name);
1031 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1032 return(0);
1033 }
1034 }
1035 /*
1036 * Add the definition
1037 */
1038 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1039 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1040 xmlCharEncodingAliasesNb++;
1041 return(0);
1042}
1043
1044/**
1045 * xmlDelEncodingAlias:
1046 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1047 *
1048 * Unregisters an encoding alias @alias
1049 *
1050 * Returns 0 in case of success, -1 in case of error
1051 */
1052int
1053xmlDelEncodingAlias(const char *alias) {
1054 int i;
1055
1056 if (alias == NULL)
1057 return(-1);
1058
1059 if (xmlCharEncodingAliases == NULL)
1060 return(-1);
1061 /*
1062 * Walk down the list looking for a definition of the alias
1063 */
1064 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1065 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1066 xmlFree((char *) xmlCharEncodingAliases[i].name);
1067 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1068 xmlCharEncodingAliasesNb--;
1069 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1070 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1071 return(0);
1072 }
1073 }
1074 return(-1);
1075}
1076
1077/**
1078 * xmlParseCharEncoding:
1079 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1080 *
William M. Brackf9415e42003-11-28 09:39:10 +00001081 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001082 * that the comparison is case insensitive accordingly to the section
1083 * [XML] 4.3.3 Character Encoding in Entities.
1084 *
1085 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1086 * if not recognized.
1087 */
1088xmlCharEncoding
1089xmlParseCharEncoding(const char* name)
1090{
1091 const char *alias;
1092 char upper[500];
1093 int i;
1094
1095 if (name == NULL)
1096 return(XML_CHAR_ENCODING_NONE);
1097
1098 /*
1099 * Do the alias resolution
1100 */
1101 alias = xmlGetEncodingAlias(name);
1102 if (alias != NULL)
1103 name = alias;
1104
1105 for (i = 0;i < 499;i++) {
1106 upper[i] = toupper(name[i]);
1107 if (upper[i] == 0) break;
1108 }
1109 upper[i] = 0;
1110
1111 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1112 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1113 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1114
1115 /*
1116 * NOTE: if we were able to parse this, the endianness of UTF16 is
1117 * already found and in use
1118 */
1119 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1120 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1121
1122 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1123 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1124 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1125
1126 /*
1127 * NOTE: if we were able to parse this, the endianness of UCS4 is
1128 * already found and in use
1129 */
1130 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1131 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1132 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1133
1134
1135 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1136 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1137 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1138
1139 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1140 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1141 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1142
1143 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1144 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1145 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1146 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1147 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1148 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1149 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1150
1151 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1152 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1153 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1154
1155#ifdef DEBUG_ENCODING
1156 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1157#endif
1158 return(XML_CHAR_ENCODING_ERROR);
1159}
1160
1161/**
1162 * xmlGetCharEncodingName:
1163 * @enc: the encoding
1164 *
1165 * The "canonical" name for XML encoding.
1166 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1167 * Section 4.3.3 Character Encoding in Entities
1168 *
1169 * Returns the canonical name for the given encoding
1170 */
1171
1172const char*
1173xmlGetCharEncodingName(xmlCharEncoding enc) {
1174 switch (enc) {
1175 case XML_CHAR_ENCODING_ERROR:
1176 return(NULL);
1177 case XML_CHAR_ENCODING_NONE:
1178 return(NULL);
1179 case XML_CHAR_ENCODING_UTF8:
1180 return("UTF-8");
1181 case XML_CHAR_ENCODING_UTF16LE:
1182 return("UTF-16");
1183 case XML_CHAR_ENCODING_UTF16BE:
1184 return("UTF-16");
1185 case XML_CHAR_ENCODING_EBCDIC:
1186 return("EBCDIC");
1187 case XML_CHAR_ENCODING_UCS4LE:
1188 return("ISO-10646-UCS-4");
1189 case XML_CHAR_ENCODING_UCS4BE:
1190 return("ISO-10646-UCS-4");
1191 case XML_CHAR_ENCODING_UCS4_2143:
1192 return("ISO-10646-UCS-4");
1193 case XML_CHAR_ENCODING_UCS4_3412:
1194 return("ISO-10646-UCS-4");
1195 case XML_CHAR_ENCODING_UCS2:
1196 return("ISO-10646-UCS-2");
1197 case XML_CHAR_ENCODING_8859_1:
1198 return("ISO-8859-1");
1199 case XML_CHAR_ENCODING_8859_2:
1200 return("ISO-8859-2");
1201 case XML_CHAR_ENCODING_8859_3:
1202 return("ISO-8859-3");
1203 case XML_CHAR_ENCODING_8859_4:
1204 return("ISO-8859-4");
1205 case XML_CHAR_ENCODING_8859_5:
1206 return("ISO-8859-5");
1207 case XML_CHAR_ENCODING_8859_6:
1208 return("ISO-8859-6");
1209 case XML_CHAR_ENCODING_8859_7:
1210 return("ISO-8859-7");
1211 case XML_CHAR_ENCODING_8859_8:
1212 return("ISO-8859-8");
1213 case XML_CHAR_ENCODING_8859_9:
1214 return("ISO-8859-9");
1215 case XML_CHAR_ENCODING_2022_JP:
1216 return("ISO-2022-JP");
1217 case XML_CHAR_ENCODING_SHIFT_JIS:
1218 return("Shift-JIS");
1219 case XML_CHAR_ENCODING_EUC_JP:
1220 return("EUC-JP");
1221 case XML_CHAR_ENCODING_ASCII:
1222 return(NULL);
1223 }
1224 return(NULL);
1225}
1226
Daniel Veillard97ac1312001-05-30 19:14:17 +00001227/************************************************************************
1228 * *
1229 * Char encoding handlers *
1230 * *
1231 ************************************************************************/
1232
Owen Taylor3473f882001-02-23 17:55:21 +00001233
1234/* the size should be growable, but it's not a big deal ... */
1235#define MAX_ENCODING_HANDLERS 50
1236static xmlCharEncodingHandlerPtr *handlers = NULL;
1237static int nbCharEncodingHandler = 0;
1238
1239/*
1240 * The default is UTF-8 for XML, that's also the default used for the
1241 * parser internals, so the default encoding handler is NULL
1242 */
1243
1244static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1245
1246/**
1247 * xmlNewCharEncodingHandler:
1248 * @name: the encoding name, in UTF-8 format (ASCII actually)
1249 * @input: the xmlCharEncodingInputFunc to read that encoding
1250 * @output: the xmlCharEncodingOutputFunc to write that encoding
1251 *
1252 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001253 *
Owen Taylor3473f882001-02-23 17:55:21 +00001254 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1255 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001256xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001257xmlNewCharEncodingHandler(const char *name,
1258 xmlCharEncodingInputFunc input,
1259 xmlCharEncodingOutputFunc output) {
1260 xmlCharEncodingHandlerPtr handler;
1261 const char *alias;
1262 char upper[500];
1263 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001264 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001265
1266 /*
1267 * Do the alias resolution
1268 */
1269 alias = xmlGetEncodingAlias(name);
1270 if (alias != NULL)
1271 name = alias;
1272
1273 /*
1274 * Keep only the uppercase version of the encoding.
1275 */
1276 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001277 xmlEncodingErr(XML_I18N_NO_NAME,
1278 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001279 return(NULL);
1280 }
1281 for (i = 0;i < 499;i++) {
1282 upper[i] = toupper(name[i]);
1283 if (upper[i] == 0) break;
1284 }
1285 upper[i] = 0;
1286 up = xmlMemStrdup(upper);
1287 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001288 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001289 return(NULL);
1290 }
1291
1292 /*
1293 * allocate and fill-up an handler block.
1294 */
1295 handler = (xmlCharEncodingHandlerPtr)
1296 xmlMalloc(sizeof(xmlCharEncodingHandler));
1297 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001298 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001299 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001300 return(NULL);
1301 }
1302 handler->input = input;
1303 handler->output = output;
1304 handler->name = up;
1305
1306#ifdef LIBXML_ICONV_ENABLED
1307 handler->iconv_in = NULL;
1308 handler->iconv_out = NULL;
1309#endif /* LIBXML_ICONV_ENABLED */
1310
1311 /*
1312 * registers and returns the handler.
1313 */
1314 xmlRegisterCharEncodingHandler(handler);
1315#ifdef DEBUG_ENCODING
1316 xmlGenericError(xmlGenericErrorContext,
1317 "Registered encoding handler for %s\n", name);
1318#endif
1319 return(handler);
1320}
1321
1322/**
1323 * xmlInitCharEncodingHandlers:
1324 *
1325 * Initialize the char encoding support, it registers the default
1326 * encoding supported.
1327 * NOTE: while public, this function usually doesn't need to be called
1328 * in normal processing.
1329 */
1330void
1331xmlInitCharEncodingHandlers(void) {
1332 unsigned short int tst = 0x1234;
1333 unsigned char *ptr = (unsigned char *) &tst;
1334
1335 if (handlers != NULL) return;
1336
1337 handlers = (xmlCharEncodingHandlerPtr *)
1338 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1339
1340 if (*ptr == 0x12) xmlLittleEndian = 0;
1341 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001342 else {
1343 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1344 "Odd problem at endianness detection\n", NULL);
1345 }
Owen Taylor3473f882001-02-23 17:55:21 +00001346
1347 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001348 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001349 return;
1350 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001351 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001352#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001353 xmlUTF16LEHandler =
1354 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1355 xmlUTF16BEHandler =
1356 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001357 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001358 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1359 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001360 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001361#ifdef LIBXML_HTML_ENABLED
1362 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1363#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001364#else
1365 xmlUTF16LEHandler =
1366 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1367 xmlUTF16BEHandler =
1368 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001369 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001370 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1371 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1372 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1373#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001374#ifndef LIBXML_ICONV_ENABLED
1375#ifdef LIBXML_ISO8859X_ENABLED
1376 xmlRegisterCharEncodingHandlersISO8859x ();
1377#endif
1378#endif
1379
Owen Taylor3473f882001-02-23 17:55:21 +00001380}
1381
1382/**
1383 * xmlCleanupCharEncodingHandlers:
1384 *
1385 * Cleanup the memory allocated for the char encoding support, it
1386 * unregisters all the encoding handlers and the aliases.
1387 */
1388void
1389xmlCleanupCharEncodingHandlers(void) {
1390 xmlCleanupEncodingAliases();
1391
1392 if (handlers == NULL) return;
1393
1394 for (;nbCharEncodingHandler > 0;) {
1395 nbCharEncodingHandler--;
1396 if (handlers[nbCharEncodingHandler] != NULL) {
1397 if (handlers[nbCharEncodingHandler]->name != NULL)
1398 xmlFree(handlers[nbCharEncodingHandler]->name);
1399 xmlFree(handlers[nbCharEncodingHandler]);
1400 }
1401 }
1402 xmlFree(handlers);
1403 handlers = NULL;
1404 nbCharEncodingHandler = 0;
1405 xmlDefaultCharEncodingHandler = NULL;
1406}
1407
1408/**
1409 * xmlRegisterCharEncodingHandler:
1410 * @handler: the xmlCharEncodingHandlerPtr handler block
1411 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001412 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001413 */
1414void
1415xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1416 if (handlers == NULL) xmlInitCharEncodingHandlers();
1417 if (handler == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001418 xmlEncodingErr(XML_I18N_NO_HANDLER,
1419 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001420 return;
1421 }
1422
1423 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001424 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1425 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1426 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001427 return;
1428 }
1429 handlers[nbCharEncodingHandler++] = handler;
1430}
1431
1432/**
1433 * xmlGetCharEncodingHandler:
1434 * @enc: an xmlCharEncoding value.
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 *
1438 * Returns the handler or NULL if not found
1439 */
1440xmlCharEncodingHandlerPtr
1441xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1442 xmlCharEncodingHandlerPtr handler;
1443
1444 if (handlers == NULL) xmlInitCharEncodingHandlers();
1445 switch (enc) {
1446 case XML_CHAR_ENCODING_ERROR:
1447 return(NULL);
1448 case XML_CHAR_ENCODING_NONE:
1449 return(NULL);
1450 case XML_CHAR_ENCODING_UTF8:
1451 return(NULL);
1452 case XML_CHAR_ENCODING_UTF16LE:
1453 return(xmlUTF16LEHandler);
1454 case XML_CHAR_ENCODING_UTF16BE:
1455 return(xmlUTF16BEHandler);
1456 case XML_CHAR_ENCODING_EBCDIC:
1457 handler = xmlFindCharEncodingHandler("EBCDIC");
1458 if (handler != NULL) return(handler);
1459 handler = xmlFindCharEncodingHandler("ebcdic");
1460 if (handler != NULL) return(handler);
1461 break;
1462 case XML_CHAR_ENCODING_UCS4BE:
1463 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1464 if (handler != NULL) return(handler);
1465 handler = xmlFindCharEncodingHandler("UCS-4");
1466 if (handler != NULL) return(handler);
1467 handler = xmlFindCharEncodingHandler("UCS4");
1468 if (handler != NULL) return(handler);
1469 break;
1470 case XML_CHAR_ENCODING_UCS4LE:
1471 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1472 if (handler != NULL) return(handler);
1473 handler = xmlFindCharEncodingHandler("UCS-4");
1474 if (handler != NULL) return(handler);
1475 handler = xmlFindCharEncodingHandler("UCS4");
1476 if (handler != NULL) return(handler);
1477 break;
1478 case XML_CHAR_ENCODING_UCS4_2143:
1479 break;
1480 case XML_CHAR_ENCODING_UCS4_3412:
1481 break;
1482 case XML_CHAR_ENCODING_UCS2:
1483 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1484 if (handler != NULL) return(handler);
1485 handler = xmlFindCharEncodingHandler("UCS-2");
1486 if (handler != NULL) return(handler);
1487 handler = xmlFindCharEncodingHandler("UCS2");
1488 if (handler != NULL) return(handler);
1489 break;
1490
1491 /*
1492 * We used to keep ISO Latin encodings native in the
1493 * generated data. This led to so many problems that
1494 * this has been removed. One can still change this
1495 * back by registering no-ops encoders for those
1496 */
1497 case XML_CHAR_ENCODING_8859_1:
1498 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1499 if (handler != NULL) return(handler);
1500 break;
1501 case XML_CHAR_ENCODING_8859_2:
1502 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1503 if (handler != NULL) return(handler);
1504 break;
1505 case XML_CHAR_ENCODING_8859_3:
1506 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1507 if (handler != NULL) return(handler);
1508 break;
1509 case XML_CHAR_ENCODING_8859_4:
1510 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1511 if (handler != NULL) return(handler);
1512 break;
1513 case XML_CHAR_ENCODING_8859_5:
1514 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1515 if (handler != NULL) return(handler);
1516 break;
1517 case XML_CHAR_ENCODING_8859_6:
1518 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1519 if (handler != NULL) return(handler);
1520 break;
1521 case XML_CHAR_ENCODING_8859_7:
1522 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1523 if (handler != NULL) return(handler);
1524 break;
1525 case XML_CHAR_ENCODING_8859_8:
1526 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1527 if (handler != NULL) return(handler);
1528 break;
1529 case XML_CHAR_ENCODING_8859_9:
1530 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1531 if (handler != NULL) return(handler);
1532 break;
1533
1534
1535 case XML_CHAR_ENCODING_2022_JP:
1536 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1537 if (handler != NULL) return(handler);
1538 break;
1539 case XML_CHAR_ENCODING_SHIFT_JIS:
1540 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1541 if (handler != NULL) return(handler);
1542 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1543 if (handler != NULL) return(handler);
1544 handler = xmlFindCharEncodingHandler("Shift_JIS");
1545 if (handler != NULL) return(handler);
1546 break;
1547 case XML_CHAR_ENCODING_EUC_JP:
1548 handler = xmlFindCharEncodingHandler("EUC-JP");
1549 if (handler != NULL) return(handler);
1550 break;
1551 default:
1552 break;
1553 }
1554
1555#ifdef DEBUG_ENCODING
1556 xmlGenericError(xmlGenericErrorContext,
1557 "No handler found for encoding %d\n", enc);
1558#endif
1559 return(NULL);
1560}
1561
1562/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001563 * xmlFindCharEncodingHandler:
1564 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001565 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001566 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001567 *
1568 * Returns the handler or NULL if not found
1569 */
1570xmlCharEncodingHandlerPtr
1571xmlFindCharEncodingHandler(const char *name) {
1572 const char *nalias;
1573 const char *norig;
1574 xmlCharEncoding alias;
1575#ifdef LIBXML_ICONV_ENABLED
1576 xmlCharEncodingHandlerPtr enc;
1577 iconv_t icv_in, icv_out;
1578#endif /* LIBXML_ICONV_ENABLED */
1579 char upper[100];
1580 int i;
1581
1582 if (handlers == NULL) xmlInitCharEncodingHandlers();
1583 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1584 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1585
1586 /*
1587 * Do the alias resolution
1588 */
1589 norig = name;
1590 nalias = xmlGetEncodingAlias(name);
1591 if (nalias != NULL)
1592 name = nalias;
1593
1594 /*
1595 * Check first for directly registered encoding names
1596 */
1597 for (i = 0;i < 99;i++) {
1598 upper[i] = toupper(name[i]);
1599 if (upper[i] == 0) break;
1600 }
1601 upper[i] = 0;
1602
1603 for (i = 0;i < nbCharEncodingHandler; i++)
1604 if (!strcmp(upper, handlers[i]->name)) {
1605#ifdef DEBUG_ENCODING
1606 xmlGenericError(xmlGenericErrorContext,
1607 "Found registered handler for encoding %s\n", name);
1608#endif
1609 return(handlers[i]);
1610 }
1611
1612#ifdef LIBXML_ICONV_ENABLED
1613 /* check whether iconv can handle this */
1614 icv_in = iconv_open("UTF-8", name);
1615 icv_out = iconv_open(name, "UTF-8");
Daniel Veillard28aac0b2006-10-16 08:31:18 +00001616 if (icv_in == (iconv_t) -1) {
1617 icv_in = iconv_open("UTF-8", upper);
1618 }
1619 if (icv_out == (iconv_t) -1) {
1620 icv_out = iconv_open(upper, "UTF-8");
1621 }
Owen Taylor3473f882001-02-23 17:55:21 +00001622 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1623 enc = (xmlCharEncodingHandlerPtr)
1624 xmlMalloc(sizeof(xmlCharEncodingHandler));
1625 if (enc == NULL) {
1626 iconv_close(icv_in);
1627 iconv_close(icv_out);
1628 return(NULL);
1629 }
1630 enc->name = xmlMemStrdup(name);
1631 enc->input = NULL;
1632 enc->output = NULL;
1633 enc->iconv_in = icv_in;
1634 enc->iconv_out = icv_out;
1635#ifdef DEBUG_ENCODING
1636 xmlGenericError(xmlGenericErrorContext,
1637 "Found iconv handler for encoding %s\n", name);
1638#endif
1639 return enc;
1640 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001641 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001642 "iconv : problems with filters for '%s'\n", name);
1643 }
1644#endif /* LIBXML_ICONV_ENABLED */
1645
1646#ifdef DEBUG_ENCODING
1647 xmlGenericError(xmlGenericErrorContext,
1648 "No handler found for encoding %s\n", name);
1649#endif
1650
1651 /*
1652 * Fallback using the canonical names
1653 */
1654 alias = xmlParseCharEncoding(norig);
1655 if (alias != XML_CHAR_ENCODING_ERROR) {
1656 const char* canon;
1657 canon = xmlGetCharEncodingName(alias);
1658 if ((canon != NULL) && (strcmp(name, canon))) {
1659 return(xmlFindCharEncodingHandler(canon));
1660 }
1661 }
1662
William M. Brackf9415e42003-11-28 09:39:10 +00001663 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001664 return(NULL);
1665}
1666
Daniel Veillard97ac1312001-05-30 19:14:17 +00001667/************************************************************************
1668 * *
1669 * ICONV based generic conversion functions *
1670 * *
1671 ************************************************************************/
1672
Owen Taylor3473f882001-02-23 17:55:21 +00001673#ifdef LIBXML_ICONV_ENABLED
1674/**
1675 * xmlIconvWrapper:
1676 * @cd: iconv converter data structure
1677 * @out: a pointer to an array of bytes to store the result
1678 * @outlen: the length of @out
1679 * @in: a pointer to an array of ISO Latin 1 chars
1680 * @inlen: the length of @in
1681 *
1682 * Returns 0 if success, or
1683 * -1 by lack of space, or
1684 * -2 if the transcoding fails (for *in is not valid utf8 string or
1685 * the result of transformation can't fit into the encoding we want), or
1686 * -3 if there the last byte can't form a single output char.
1687 *
1688 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001689 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001690 * The value of @outlen after return is the number of ocetes consumed.
1691 */
1692static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001693xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1694 const unsigned char *in, int *inlen) {
1695 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001696 const char *icv_in = (const char *) in;
1697 char *icv_out = (char *) out;
1698 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001699
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001700 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1701 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001702 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001703 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001704 icv_inlen = *inlen;
1705 icv_outlen = *outlen;
Daniel Veillard8e1a46d2008-02-15 07:47:26 +00001706 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard2728f842006-03-09 16:49:24 +00001707 *inlen -= icv_inlen;
1708 *outlen -= icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001709 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001710#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001711 if (errno == EILSEQ) {
1712 return -2;
1713 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001714#endif
1715#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001716 if (errno == E2BIG) {
1717 return -1;
1718 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001719#endif
1720#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001721 if (errno == EINVAL) {
1722 return -3;
1723 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001724#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001725 {
1726 return -3;
1727 }
1728 }
1729 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001730}
1731#endif /* LIBXML_ICONV_ENABLED */
1732
Daniel Veillard97ac1312001-05-30 19:14:17 +00001733/************************************************************************
1734 * *
1735 * The real API used by libxml for on-the-fly conversion *
1736 * *
1737 ************************************************************************/
1738
Owen Taylor3473f882001-02-23 17:55:21 +00001739/**
1740 * xmlCharEncFirstLine:
1741 * @handler: char enconding transformation data structure
1742 * @out: an xmlBuffer for the output.
1743 * @in: an xmlBuffer for the input
1744 *
1745 * Front-end for the encoding handler input function, but handle only
1746 * the very first line, i.e. limit itself to 45 chars.
1747 *
1748 * Returns the number of byte written if success, or
1749 * -1 general error
1750 * -2 if the transcoding fails (for *in is not valid utf8 string or
1751 * the result of transformation can't fit into the encoding we want), or
1752 */
1753int
1754xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1755 xmlBufferPtr in) {
1756 int ret = -2;
1757 int written;
1758 int toconv;
1759
1760 if (handler == NULL) return(-1);
1761 if (out == NULL) return(-1);
1762 if (in == NULL) return(-1);
1763
William M. Brack38d452a2007-05-22 16:00:06 +00001764 /* calculate space available */
Owen Taylor3473f882001-02-23 17:55:21 +00001765 written = out->size - out->use;
1766 toconv = in->use;
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /*
1768 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1769 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001770 * declaration without going too far inside the document content.
Daniel Veillard57c9db02008-03-06 14:37:10 +00001771 * on UTF-16 this means 90bytes, on UCS4 this means 180
Owen Taylor3473f882001-02-23 17:55:21 +00001772 */
Daniel Veillard57c9db02008-03-06 14:37:10 +00001773 if (toconv > 180)
1774 toconv = 180;
William M. Brack38d452a2007-05-22 16:00:06 +00001775 if (toconv * 2 >= written) {
1776 xmlBufferGrow(out, toconv);
1777 written = out->size - out->use - 1;
1778 }
Owen Taylor3473f882001-02-23 17:55:21 +00001779
1780 if (handler->input != NULL) {
1781 ret = handler->input(&out->content[out->use], &written,
1782 in->content, &toconv);
1783 xmlBufferShrink(in, toconv);
1784 out->use += written;
1785 out->content[out->use] = 0;
1786 }
1787#ifdef LIBXML_ICONV_ENABLED
1788 else if (handler->iconv_in != NULL) {
1789 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1790 &written, in->content, &toconv);
1791 xmlBufferShrink(in, toconv);
1792 out->use += written;
1793 out->content[out->use] = 0;
1794 if (ret == -1) ret = -3;
1795 }
1796#endif /* LIBXML_ICONV_ENABLED */
1797#ifdef DEBUG_ENCODING
1798 switch (ret) {
1799 case 0:
1800 xmlGenericError(xmlGenericErrorContext,
1801 "converted %d bytes to %d bytes of input\n",
1802 toconv, written);
1803 break;
1804 case -1:
1805 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1806 toconv, written, in->use);
1807 break;
1808 case -2:
1809 xmlGenericError(xmlGenericErrorContext,
1810 "input conversion failed due to input error\n");
1811 break;
1812 case -3:
1813 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1814 toconv, written, in->use);
1815 break;
1816 default:
1817 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1818 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001819#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001820 /*
1821 * Ignore when input buffer is not on a boundary
1822 */
1823 if (ret == -3) ret = 0;
1824 if (ret == -1) ret = 0;
1825 return(ret);
1826}
1827
1828/**
1829 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001830 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001831 * @out: an xmlBuffer for the output.
1832 * @in: an xmlBuffer for the input
1833 *
1834 * Generic front-end for the encoding handler input function
1835 *
1836 * Returns the number of byte written if success, or
1837 * -1 general error
1838 * -2 if the transcoding fails (for *in is not valid utf8 string or
1839 * the result of transformation can't fit into the encoding we want), or
1840 */
1841int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001842xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1843 xmlBufferPtr in)
1844{
Owen Taylor3473f882001-02-23 17:55:21 +00001845 int ret = -2;
1846 int written;
1847 int toconv;
1848
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001849 if (handler == NULL)
1850 return (-1);
1851 if (out == NULL)
1852 return (-1);
1853 if (in == NULL)
1854 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001855
1856 toconv = in->use;
1857 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001858 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001859 written = out->size - out->use;
1860 if (toconv * 2 >= written) {
1861 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001862 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001863 }
1864 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001865 ret = handler->input(&out->content[out->use], &written,
1866 in->content, &toconv);
1867 xmlBufferShrink(in, toconv);
1868 out->use += written;
1869 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001870 }
1871#ifdef LIBXML_ICONV_ENABLED
1872 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001873 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1874 &written, in->content, &toconv);
1875 xmlBufferShrink(in, toconv);
1876 out->use += written;
1877 out->content[out->use] = 0;
1878 if (ret == -1)
1879 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 }
1881#endif /* LIBXML_ICONV_ENABLED */
1882 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001883 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001884#ifdef DEBUG_ENCODING
1885 xmlGenericError(xmlGenericErrorContext,
1886 "converted %d bytes to %d bytes of input\n",
1887 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001888#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001889 break;
1890 case -1:
1891#ifdef DEBUG_ENCODING
1892 xmlGenericError(xmlGenericErrorContext,
1893 "converted %d bytes to %d bytes of input, %d left\n",
1894 toconv, written, in->use);
1895#endif
1896 break;
1897 case -3:
1898#ifdef DEBUG_ENCODING
1899 xmlGenericError(xmlGenericErrorContext,
1900 "converted %d bytes to %d bytes of input, %d left\n",
1901 toconv, written, in->use);
1902#endif
1903 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001904 case -2: {
1905 char buf[50];
1906
Daniel Veillard2e7598c2005-09-02 12:28:34 +00001907 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001908 in->content[0], in->content[1],
1909 in->content[2], in->content[3]);
1910 buf[49] = 0;
1911 xmlEncodingErr(XML_I18N_CONV_FAILED,
1912 "input conversion failed due to input error, bytes %s\n",
1913 buf);
1914 }
Owen Taylor3473f882001-02-23 17:55:21 +00001915 }
1916 /*
1917 * Ignore when input buffer is not on a boundary
1918 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001919 if (ret == -3)
1920 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00001921 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001922}
1923
1924/**
1925 * xmlCharEncOutFunc:
1926 * @handler: char enconding transformation data structure
1927 * @out: an xmlBuffer for the output.
1928 * @in: an xmlBuffer for the input
1929 *
1930 * Generic front-end for the encoding handler output function
1931 * a first call with @in == NULL has to be made firs to initiate the
1932 * output in case of non-stateless encoding needing to initiate their
1933 * state or the output (like the BOM in UTF16).
1934 * In case of UTF8 sequence conversion errors for the given encoder,
1935 * the content will be automatically remapped to a CharRef sequence.
1936 *
1937 * Returns the number of byte written if success, or
1938 * -1 general error
1939 * -2 if the transcoding fails (for *in is not valid utf8 string or
1940 * the result of transformation can't fit into the encoding we want), or
1941 */
1942int
1943xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1944 xmlBufferPtr in) {
1945 int ret = -2;
1946 int written;
1947 int writtentot = 0;
1948 int toconv;
1949 int output = 0;
1950
1951 if (handler == NULL) return(-1);
1952 if (out == NULL) return(-1);
1953
1954retry:
1955
1956 written = out->size - out->use;
1957
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001958 if (written > 0)
1959 written--; /* Gennady: count '/0' */
1960
Owen Taylor3473f882001-02-23 17:55:21 +00001961 /*
1962 * First specific handling of in = NULL, i.e. the initialization call
1963 */
1964 if (in == NULL) {
1965 toconv = 0;
1966 if (handler->output != NULL) {
1967 ret = handler->output(&out->content[out->use], &written,
1968 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001969 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001970 out->use += written;
1971 out->content[out->use] = 0;
1972 }
Owen Taylor3473f882001-02-23 17:55:21 +00001973 }
1974#ifdef LIBXML_ICONV_ENABLED
1975 else if (handler->iconv_out != NULL) {
1976 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1977 &written, NULL, &toconv);
1978 out->use += written;
1979 out->content[out->use] = 0;
1980 }
1981#endif /* LIBXML_ICONV_ENABLED */
1982#ifdef DEBUG_ENCODING
1983 xmlGenericError(xmlGenericErrorContext,
1984 "initialized encoder\n");
1985#endif
1986 return(0);
1987 }
1988
1989 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001990 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001991 */
1992 toconv = in->use;
1993 if (toconv == 0)
1994 return(0);
Daniel Veillardf1245392008-04-03 09:46:34 +00001995 if (toconv * 4 >= written) {
1996 xmlBufferGrow(out, toconv * 4);
Owen Taylor3473f882001-02-23 17:55:21 +00001997 written = out->size - out->use - 1;
1998 }
1999 if (handler->output != NULL) {
2000 ret = handler->output(&out->content[out->use], &written,
2001 in->content, &toconv);
2002 xmlBufferShrink(in, toconv);
2003 out->use += written;
2004 writtentot += written;
2005 out->content[out->use] = 0;
2006 }
2007#ifdef LIBXML_ICONV_ENABLED
2008 else if (handler->iconv_out != NULL) {
2009 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2010 &written, in->content, &toconv);
2011 xmlBufferShrink(in, toconv);
2012 out->use += written;
2013 writtentot += written;
2014 out->content[out->use] = 0;
2015 if (ret == -1) {
2016 if (written > 0) {
2017 /*
2018 * Can be a limitation of iconv
2019 */
2020 goto retry;
2021 }
2022 ret = -3;
2023 }
2024 }
2025#endif /* LIBXML_ICONV_ENABLED */
2026 else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002027 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2028 "xmlCharEncOutFunc: no output function !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002029 return(-1);
2030 }
2031
2032 if (ret >= 0) output += ret;
2033
2034 /*
2035 * Attempt to handle error cases
2036 */
2037 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002038 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002039#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002040 xmlGenericError(xmlGenericErrorContext,
2041 "converted %d bytes to %d bytes of output\n",
2042 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002043#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002044 break;
2045 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002046#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002047 xmlGenericError(xmlGenericErrorContext,
2048 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002049#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002050 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002051 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002052#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002053 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2054 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002055#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002056 break;
2057 case -2: {
2058 int len = in->use;
2059 const xmlChar *utf = (const xmlChar *) in->content;
2060 int cur;
2061
2062 cur = xmlGetUTF8Char(utf, &len);
2063 if (cur > 0) {
2064 xmlChar charref[20];
2065
2066#ifdef DEBUG_ENCODING
2067 xmlGenericError(xmlGenericErrorContext,
2068 "handling output conversion error\n");
2069 xmlGenericError(xmlGenericErrorContext,
2070 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2071 in->content[0], in->content[1],
2072 in->content[2], in->content[3]);
2073#endif
2074 /*
2075 * Removes the UTF8 sequence, and replace it by a charref
2076 * and continue the transcoding phase, hoping the error
2077 * did not mangle the encoder state.
2078 */
Daniel Veillard2e7598c2005-09-02 12:28:34 +00002079 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002080 xmlBufferShrink(in, len);
2081 xmlBufferAddHead(in, charref, -1);
2082
2083 goto retry;
2084 } else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002085 char buf[50];
2086
Daniel Veillard2e7598c2005-09-02 12:28:34 +00002087 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002088 in->content[0], in->content[1],
2089 in->content[2], in->content[3]);
2090 buf[49] = 0;
2091 xmlEncodingErr(XML_I18N_CONV_FAILED,
2092 "output conversion failed due to conv error, bytes %s\n",
2093 buf);
Daniel Veillarddf750622006-05-02 12:24:06 +00002094 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2095 in->content[0] = ' ';
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 break;
2098 }
2099 }
2100 return(ret);
2101}
2102
2103/**
2104 * xmlCharEncCloseFunc:
2105 * @handler: char enconding transformation data structure
2106 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002107 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002108 *
2109 * Returns 0 if success, or -1 in case of error
2110 */
2111int
2112xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2113 int ret = 0;
2114 if (handler == NULL) return(-1);
2115 if (handler->name == NULL) return(-1);
2116#ifdef LIBXML_ICONV_ENABLED
2117 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002118 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002119 * and the associated icon resources.
2120 */
2121 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2122 if (handler->name != NULL)
2123 xmlFree(handler->name);
2124 handler->name = NULL;
2125 if (handler->iconv_out != NULL) {
2126 if (iconv_close(handler->iconv_out))
2127 ret = -1;
2128 handler->iconv_out = NULL;
2129 }
2130 if (handler->iconv_in != NULL) {
2131 if (iconv_close(handler->iconv_in))
2132 ret = -1;
2133 handler->iconv_in = NULL;
2134 }
2135 xmlFree(handler);
2136 }
2137#endif /* LIBXML_ICONV_ENABLED */
2138#ifdef DEBUG_ENCODING
2139 if (ret)
2140 xmlGenericError(xmlGenericErrorContext,
2141 "failed to close the encoding handler\n");
2142 else
2143 xmlGenericError(xmlGenericErrorContext,
2144 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002145#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 return(ret);
2148}
2149
Daniel Veillard36711902004-02-11 13:25:26 +00002150/**
2151 * xmlByteConsumed:
2152 * @ctxt: an XML parser context
2153 *
2154 * This function provides the current index of the parser relative
2155 * to the start of the current entity. This function is computed in
2156 * bytes from the beginning starting at zero and finishing at the
2157 * size in byte of the file if parsing a file. The function is
2158 * of constant cost if the input is UTF-8 but can be costly if run
2159 * on non-UTF-8 input.
2160 *
2161 * Returns the index in bytes from the beginning of the entity or -1
2162 * in case the index could not be computed.
2163 */
2164long
2165xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2166 xmlParserInputPtr in;
2167
2168 if (ctxt == NULL) return(-1);
2169 in = ctxt->input;
2170 if (in == NULL) return(-1);
2171 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2172 unsigned int unused = 0;
2173 xmlCharEncodingHandler * handler = in->buf->encoder;
2174 /*
2175 * Encoding conversion, compute the number of unused original
2176 * bytes from the input not consumed and substract that from
2177 * the raw consumed value, this is not a cheap operation
2178 */
2179 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002180 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002181 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002182 int toconv = in->end - in->cur, written = 32000;
2183
2184 int ret;
2185
2186 if (handler->output != NULL) {
2187 do {
2188 toconv = in->end - cur;
2189 written = 32000;
2190 ret = handler->output(&convbuf[0], &written,
2191 cur, &toconv);
2192 if (ret == -1) return(-1);
2193 unused += written;
2194 cur += toconv;
2195 } while (ret == -2);
2196#ifdef LIBXML_ICONV_ENABLED
2197 } else if (handler->iconv_out != NULL) {
2198 do {
2199 toconv = in->end - cur;
2200 written = 32000;
2201 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2202 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002203 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002204 if (written > 0)
2205 ret = -2;
2206 else
2207 return(-1);
2208 }
2209 unused += written;
2210 cur += toconv;
2211 } while (ret == -2);
2212#endif
2213 } else {
2214 /* could not find a converter */
2215 return(-1);
2216 }
2217 }
2218 if (in->buf->rawconsumed < unused)
2219 return(-1);
2220 return(in->buf->rawconsumed - unused);
2221 }
2222 return(in->consumed + (in->cur - in->base));
2223}
2224
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002225#ifndef LIBXML_ICONV_ENABLED
2226#ifdef LIBXML_ISO8859X_ENABLED
2227
2228/**
2229 * UTF8ToISO8859x:
2230 * @out: a pointer to an array of bytes to store the result
2231 * @outlen: the length of @out
2232 * @in: a pointer to an array of UTF-8 chars
2233 * @inlen: the length of @in
2234 * @xlattable: the 2-level transcoding table
2235 *
2236 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2237 * block of chars out.
2238 *
2239 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2240 * The value of @inlen after return is the number of octets consumed
2241 * as the return value is positive, else unpredictable.
2242 * The value of @outlen after return is the number of ocetes consumed.
2243 */
2244static int
2245UTF8ToISO8859x(unsigned char* out, int *outlen,
2246 const unsigned char* in, int *inlen,
2247 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002248 const unsigned char* outstart = out;
2249 const unsigned char* inend;
2250 const unsigned char* instart = in;
2251
Daniel Veillardce682bc2004-11-05 17:22:25 +00002252 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2253 (xlattable == NULL))
2254 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002255 if (in == NULL) {
2256 /*
2257 * initialization nothing to do
2258 */
2259 *outlen = 0;
2260 *inlen = 0;
2261 return(0);
2262 }
2263 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002264 while (in < inend) {
2265 unsigned char d = *in++;
2266 if (d < 0x80) {
2267 *out++ = d;
2268 } else if (d < 0xC0) {
2269 /* trailing byte in leading position */
2270 *outlen = out - outstart;
2271 *inlen = in - instart - 1;
2272 return(-2);
2273 } else if (d < 0xE0) {
2274 unsigned char c;
2275 if (!(in < inend)) {
2276 /* trailing byte not in input buffer */
2277 *outlen = out - outstart;
2278 *inlen = in - instart - 1;
2279 return(-2);
2280 }
2281 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002282 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002283 /* not a trailing byte */
2284 *outlen = out - outstart;
2285 *inlen = in - instart - 2;
2286 return(-2);
2287 }
2288 c = c & 0x3F;
2289 d = d & 0x1F;
2290 d = xlattable [48 + c + xlattable [d] * 64];
2291 if (d == 0) {
2292 /* not in character set */
2293 *outlen = out - outstart;
2294 *inlen = in - instart - 2;
2295 return(-2);
2296 }
2297 *out++ = d;
2298 } else if (d < 0xF0) {
2299 unsigned char c1;
2300 unsigned char c2;
2301 if (!(in < inend - 1)) {
2302 /* trailing bytes not in input buffer */
2303 *outlen = out - outstart;
2304 *inlen = in - instart - 1;
2305 return(-2);
2306 }
2307 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002308 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002309 /* not a trailing byte (c1) */
2310 *outlen = out - outstart;
2311 *inlen = in - instart - 2;
2312 return(-2);
2313 }
2314 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002315 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002316 /* not a trailing byte (c2) */
2317 *outlen = out - outstart;
2318 *inlen = in - instart - 2;
2319 return(-2);
2320 }
2321 c1 = c1 & 0x3F;
2322 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002323 d = d & 0x0F;
2324 d = xlattable [48 + c2 + xlattable [48 + c1 +
2325 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002326 if (d == 0) {
2327 /* not in character set */
2328 *outlen = out - outstart;
2329 *inlen = in - instart - 3;
2330 return(-2);
2331 }
2332 *out++ = d;
2333 } else {
2334 /* cannot transcode >= U+010000 */
2335 *outlen = out - outstart;
2336 *inlen = in - instart - 1;
2337 return(-2);
2338 }
2339 }
2340 *outlen = out - outstart;
2341 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002342 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002343}
2344
2345/**
2346 * ISO8859xToUTF8
2347 * @out: a pointer to an array of bytes to store the result
2348 * @outlen: the length of @out
2349 * @in: a pointer to an array of ISO Latin 1 chars
2350 * @inlen: the length of @in
2351 *
2352 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2353 * block of chars out.
2354 * Returns 0 if success, or -1 otherwise
2355 * The value of @inlen after return is the number of octets consumed
2356 * The value of @outlen after return is the number of ocetes produced.
2357 */
2358static int
2359ISO8859xToUTF8(unsigned char* out, int *outlen,
2360 const unsigned char* in, int *inlen,
2361 unsigned short const *unicodetable) {
2362 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002363 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002364 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002365 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00002366 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002367 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002368
Daniel Veillardce682bc2004-11-05 17:22:25 +00002369 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002370 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002371 return(-1);
2372 outend = out + *outlen;
2373 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00002374 instop = inend;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002375 c = *in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002376 while (in < inend && out < outend - 1) {
2377 if (c >= 0x80) {
2378 c = unicodetable [c - 0x80];
2379 if (c == 0) {
2380 /* undefined code point */
2381 *outlen = out - outstart;
2382 *inlen = in - instart;
2383 return (-1);
2384 }
2385 if (c < 0x800) {
2386 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2387 *out++ = (c & 0x3F) | 0x80;
2388 } else {
2389 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2390 *out++ = ((c >> 6) & 0x3F) | 0x80;
2391 *out++ = (c & 0x3F) | 0x80;
2392 }
2393 ++in;
2394 c = *in;
2395 }
2396 if (instop - in > outend - out) instop = in + (outend - out);
2397 while (c < 0x80 && in < instop) {
2398 *out++ = c;
2399 ++in;
2400 c = *in;
2401 }
2402 }
2403 if (in < inend && out < outend && c < 0x80) {
2404 *out++ = c;
2405 ++in;
2406 }
2407 *outlen = out - outstart;
2408 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002409 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002410}
2411
2412
2413/************************************************************************
2414 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2415 ************************************************************************/
2416
2417static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2418 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2419 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2420 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2421 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2422 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2423 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2424 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2425 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2426 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2427 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2428 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2429 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2430 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2431 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2432 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2433 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2434};
2435
2436static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2437 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2444 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2445 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2446 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2447 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2448 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2449 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2452 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2453 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2456 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2457 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2458 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2459 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2460 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2461 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2462 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2463 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2464};
2465
2466static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2467 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2468 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2469 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2470 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2471 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2472 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2473 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2474 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2475 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2476 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2477 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2478 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2479 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2480 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2481 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2482 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2483};
2484
2485static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2486 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2493 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2494 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2495 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2496 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2497 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2499 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2500 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2502 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2505 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2513 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2514 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2515 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2516 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2517};
2518
2519static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2520 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2521 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2522 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2523 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2524 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2525 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2526 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2527 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2528 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2529 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2530 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2531 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2532 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2533 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2534 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2535 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2536};
2537
2538static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2539 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2546 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2547 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2548 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2549 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2550 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2551 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2552 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2553 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2554 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2555 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2557 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2562 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2563 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2564 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2565 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2566};
2567
2568static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2569 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2570 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2571 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2572 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2573 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2574 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2575 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2576 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2577 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2578 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2579 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2580 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2581 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2582 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2583 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2584 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2585};
2586
2587static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2588 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2589 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2590 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2595 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2596 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2597 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2599 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2600 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2601 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2602 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2603 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2604 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2612 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615};
2616
2617static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2618 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2619 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2620 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2621 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2622 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2623 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2624 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2625 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2626 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2627 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2628 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2629 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2630 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2631 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2632 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2633 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2634};
2635
2636static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2637 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2638 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2645 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2646 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2648 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2654 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2655 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2656 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2657 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660};
2661
2662static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2663 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2664 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2665 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2666 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2667 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2668 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2669 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2670 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2671 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2672 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2673 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2674 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2675 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2676 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2677 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2678 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2679};
2680
2681static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2682 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2684 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2689 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2690 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2691 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2692 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2693 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2706 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2707 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2708 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2709 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713};
2714
2715static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2716 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2717 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2718 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2719 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2720 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2721 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2722 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2723 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2724 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2725 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2726 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2727 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2728 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2729 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2730 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2731 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2732};
2733
2734static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2735 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2742 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2743 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2744 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2745 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2746 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2751 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2754 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2759 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2764 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766};
2767
2768static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2769 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2770 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2771 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2772 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2773 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2774 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2775 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2776 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2777 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2778 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2779 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2780 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2781 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2782 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2783 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2784 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2785};
2786
2787static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2788 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2796 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2797 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2798 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2799 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2800 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2801 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2802 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811};
2812
2813static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2814 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2815 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2816 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2817 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2818 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2819 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2820 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2821 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2822 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2823 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2824 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2825 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2826 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2827 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2828 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2829 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2830};
2831
2832static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2833 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2841 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2842 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2843 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2844 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2845 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2846 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2847 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2851 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2852 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2861 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2862 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2863 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2864};
2865
2866static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2867 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2868 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2869 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2870 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2871 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2872 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2873 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2874 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2875 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2876 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2877 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2878 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2879 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2880 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2881 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2882 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2883};
2884
2885static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2886 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2894 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2895 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2900 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2901 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2902 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2903 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2904 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2905 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2910 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913};
2914
2915static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2916 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2917 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2918 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2919 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2920 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2921 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2922 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2923 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2924 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2925 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2926 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2927 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2928 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2929 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2930 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2931 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2932};
2933
2934static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2935 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2943 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2944 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2945 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2946 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2956 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2958 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2960 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2961 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2962 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2963 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2965 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2966};
2967
2968static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2969 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2970 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2971 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2972 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2973 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2974 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2975 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2976 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2977 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2978 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2979 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2980 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2981 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2982 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2983 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2984 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2985};
2986
2987static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2988 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2996 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2997 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2998 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3003 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3005 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3023 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3028 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3029 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3030 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3031};
3032
3033static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3034 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3035 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3036 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3037 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3038 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3039 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3040 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3041 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3042 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3043 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3044 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3045 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3046 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3047 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3048 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3049 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3050};
3051
3052static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3053 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3061 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3062 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3063 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3064 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3076 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3077 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3078 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3079 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3080};
3081
3082static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3083 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3084 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3085 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3086 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3087 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3088 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3089 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3090 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3091 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3092 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3093 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3094 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3095 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3096 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3097 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3098 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3099};
3100
3101static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3102 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3110 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3111 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3112 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3113 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3114 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3119 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3121 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3138 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3139 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3140 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3141};
3142
3143
3144/*
3145 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3146 */
3147
3148static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3149 const unsigned char* in, int *inlen) {
3150 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3151}
3152static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3153 const unsigned char* in, int *inlen) {
3154 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3155}
3156
3157static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3158 const unsigned char* in, int *inlen) {
3159 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3160}
3161static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3162 const unsigned char* in, int *inlen) {
3163 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3164}
3165
3166static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3167 const unsigned char* in, int *inlen) {
3168 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3169}
3170static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3171 const unsigned char* in, int *inlen) {
3172 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3173}
3174
3175static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3176 const unsigned char* in, int *inlen) {
3177 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3178}
3179static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3180 const unsigned char* in, int *inlen) {
3181 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3182}
3183
3184static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3185 const unsigned char* in, int *inlen) {
3186 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3187}
3188static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3189 const unsigned char* in, int *inlen) {
3190 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3191}
3192
3193static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3194 const unsigned char* in, int *inlen) {
3195 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3196}
3197static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3198 const unsigned char* in, int *inlen) {
3199 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3200}
3201
3202static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3203 const unsigned char* in, int *inlen) {
3204 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3205}
3206static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3207 const unsigned char* in, int *inlen) {
3208 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3209}
3210
3211static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3212 const unsigned char* in, int *inlen) {
3213 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3214}
3215static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3216 const unsigned char* in, int *inlen) {
3217 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3218}
3219
3220static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3221 const unsigned char* in, int *inlen) {
3222 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3223}
3224static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3225 const unsigned char* in, int *inlen) {
3226 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3227}
3228
3229static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3230 const unsigned char* in, int *inlen) {
3231 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3232}
3233static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3234 const unsigned char* in, int *inlen) {
3235 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3236}
3237
3238static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3239 const unsigned char* in, int *inlen) {
3240 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3241}
3242static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3243 const unsigned char* in, int *inlen) {
3244 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3245}
3246
3247static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3248 const unsigned char* in, int *inlen) {
3249 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3250}
3251static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3252 const unsigned char* in, int *inlen) {
3253 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3254}
3255
3256static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3257 const unsigned char* in, int *inlen) {
3258 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3259}
3260static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3261 const unsigned char* in, int *inlen) {
3262 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3263}
3264
3265static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3266 const unsigned char* in, int *inlen) {
3267 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3268}
3269static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3270 const unsigned char* in, int *inlen) {
3271 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3272}
3273
3274static void
3275xmlRegisterCharEncodingHandlersISO8859x (void) {
3276 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3277 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3278 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3279 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3280 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3281 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3282 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3283 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3284 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3285 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3286 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3287 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3288 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3289 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3290}
3291
3292#endif
3293#endif
3294
Daniel Veillard5d4644e2005-04-01 13:11:58 +00003295#define bottom_encoding
3296#include "elfgcchack.h"
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003297