blob: cf45cba26367f433188e089f8079cdd35765ce5b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000128 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
158 if (in == NULL) {
159 /*
160 * initialization nothing to do
161 */
162 *outlen = 0;
163 *inlen = 0;
164 return(0);
165 }
166 inend = in + (*inlen);
167 outend = out + (*outlen);
168 while (in < inend) {
169 d = *in++;
170 if (d < 0x80) { c= d; trailing= 0; }
171 else if (d < 0xC0) {
172 /* trailing byte in leading position */
173 *outlen = out - outstart;
174 *inlen = processed - instart;
175 return(-2);
176 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
177 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
178 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
179 else {
180 /* no chance for this in Ascii */
181 *outlen = out - outstart;
182 *inlen = processed - instart;
183 return(-2);
184 }
185
186 if (inend - in < trailing) {
187 break;
188 }
189
190 for ( ; trailing; trailing--) {
191 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
192 break;
193 c <<= 6;
194 c |= d & 0x3F;
195 }
196
197 /* assertion: c is a single UTF-4 value */
198 if (c < 0x80) {
199 if (out >= outend)
200 break;
201 *out++ = c;
202 } else {
203 /* no chance for this in Ascii */
204 *outlen = out - outstart;
205 *inlen = processed - instart;
206 return(-2);
207 }
208 processed = in;
209 }
210 *outlen = out - outstart;
211 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000212 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000213}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000214#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000215
216/**
217 * isolat1ToUTF8:
218 * @out: a pointer to an array of bytes to store the result
219 * @outlen: the length of @out
220 * @in: a pointer to an array of ISO Latin 1 chars
221 * @inlen: the length of @in
222 *
223 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
224 * block of chars out.
225 * Returns 0 if success, or -1 otherwise
226 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000227 * if the return value is positive, else unpredictable.
228 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000229 */
230int
231isolat1ToUTF8(unsigned char* out, int *outlen,
232 const unsigned char* in, int *inlen) {
233 unsigned char* outstart = out;
234 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000235 unsigned char* outend = out + *outlen;
236 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000237 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000238
239 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000240 instop = inend;
241
242 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000243 if (*in >= 0x80) {
244 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
245 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000246 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000247 }
248 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000249 while (in < instop && *in < 0x80) {
250 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 }
252 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000253 if (in < inend && out < outend && *in < 0x80) {
254 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000255 }
256 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000257 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000258 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000259}
260
Daniel Veillard81601f92003-01-14 13:42:37 +0000261/**
262 * UTF8ToUTF8:
263 * @out: a pointer to an array of bytes to store the result
264 * @outlen: the length of @out
265 * @inb: a pointer to an array of UTF-8 chars
266 * @inlenb: the length of @in in UTF-8 chars
267 *
268 * No op copy operation for UTF8 handling.
269 *
William M. Brackf9415e42003-11-28 09:39:10 +0000270 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000271 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000272 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000273 */
274static int
275UTF8ToUTF8(unsigned char* out, int *outlen,
276 const unsigned char* inb, int *inlenb)
277{
278 int len;
279
280 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
281 return(-1);
282 if (*outlen > *inlenb) {
283 len = *inlenb;
284 } else {
285 len = *outlen;
286 }
287 if (len < 0)
288 return(-1);
289
290 memcpy(out, inb, len);
291
292 *outlen = len;
293 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000294 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000295}
296
Daniel Veillarde72c7562002-05-31 09:47:30 +0000297
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000298#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000299/**
300 * UTF8Toisolat1:
301 * @out: a pointer to an array of bytes to store the result
302 * @outlen: the length of @out
303 * @in: a pointer to an array of UTF-8 chars
304 * @inlen: the length of @in
305 *
306 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
307 * block of chars out.
308 *
309 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
310 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000311 * if the return value is positive, else unpredictable.
312 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000313 */
314int
315UTF8Toisolat1(unsigned char* out, int *outlen,
316 const unsigned char* in, int *inlen) {
317 const unsigned char* processed = in;
318 const unsigned char* outend;
319 const unsigned char* outstart = out;
320 const unsigned char* instart = in;
321 const unsigned char* inend;
322 unsigned int c, d;
323 int trailing;
324
325 if (in == NULL) {
326 /*
327 * initialization nothing to do
328 */
329 *outlen = 0;
330 *inlen = 0;
331 return(0);
332 }
333 inend = in + (*inlen);
334 outend = out + (*outlen);
335 while (in < inend) {
336 d = *in++;
337 if (d < 0x80) { c= d; trailing= 0; }
338 else if (d < 0xC0) {
339 /* trailing byte in leading position */
340 *outlen = out - outstart;
341 *inlen = processed - instart;
342 return(-2);
343 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
344 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
345 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
346 else {
347 /* no chance for this in IsoLat1 */
348 *outlen = out - outstart;
349 *inlen = processed - instart;
350 return(-2);
351 }
352
353 if (inend - in < trailing) {
354 break;
355 }
356
357 for ( ; trailing; trailing--) {
358 if (in >= inend)
359 break;
360 if (((d= *in++) & 0xC0) != 0x80) {
361 *outlen = out - outstart;
362 *inlen = processed - instart;
363 return(-2);
364 }
365 c <<= 6;
366 c |= d & 0x3F;
367 }
368
369 /* assertion: c is a single UTF-4 value */
370 if (c <= 0xFF) {
371 if (out >= outend)
372 break;
373 *out++ = c;
374 } else {
375 /* no chance for this in IsoLat1 */
376 *outlen = out - outstart;
377 *inlen = processed - instart;
378 return(-2);
379 }
380 processed = in;
381 }
382 *outlen = out - outstart;
383 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000384 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000385}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000386#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000387
388/**
389 * UTF16LEToUTF8:
390 * @out: a pointer to an array of bytes to store the result
391 * @outlen: the length of @out
392 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
393 * @inlenb: the length of @in in UTF-16LE chars
394 *
395 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000396 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000397 * is the same between the native type of this machine and the
398 * inputed one.
399 *
William M. Brackf9415e42003-11-28 09:39:10 +0000400 * Returns the number of bytes written, or -1 if lack of space, or -2
401 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000402 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000403 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000404 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000405static int
Owen Taylor3473f882001-02-23 17:55:21 +0000406UTF16LEToUTF8(unsigned char* out, int *outlen,
407 const unsigned char* inb, int *inlenb)
408{
409 unsigned char* outstart = out;
410 const unsigned char* processed = inb;
411 unsigned char* outend = out + *outlen;
412 unsigned short* in = (unsigned short*) inb;
413 unsigned short* inend;
414 unsigned int c, d, inlen;
415 unsigned char *tmp;
416 int bits;
417
418 if ((*inlenb % 2) == 1)
419 (*inlenb)--;
420 inlen = *inlenb / 2;
421 inend = in + inlen;
422 while ((in < inend) && (out - outstart + 5 < *outlen)) {
423 if (xmlLittleEndian) {
424 c= *in++;
425 } else {
426 tmp = (unsigned char *) in;
427 c = *tmp++;
428 c = c | (((unsigned int)*tmp) << 8);
429 in++;
430 }
431 if ((c & 0xFC00) == 0xD800) { /* surrogates */
432 if (in >= inend) { /* (in > inend) shouldn't happens */
433 break;
434 }
435 if (xmlLittleEndian) {
436 d = *in++;
437 } else {
438 tmp = (unsigned char *) in;
439 d = *tmp++;
440 d = d | (((unsigned int)*tmp) << 8);
441 in++;
442 }
443 if ((d & 0xFC00) == 0xDC00) {
444 c &= 0x03FF;
445 c <<= 10;
446 c |= d & 0x03FF;
447 c += 0x10000;
448 }
449 else {
450 *outlen = out - outstart;
451 *inlenb = processed - inb;
452 return(-2);
453 }
454 }
455
456 /* assertion: c is a single UTF-4 value */
457 if (out >= outend)
458 break;
459 if (c < 0x80) { *out++= c; bits= -6; }
460 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
461 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
462 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
463
464 for ( ; bits >= 0; bits-= 6) {
465 if (out >= outend)
466 break;
467 *out++= ((c >> bits) & 0x3F) | 0x80;
468 }
469 processed = (const unsigned char*) in;
470 }
471 *outlen = out - outstart;
472 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000473 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000474}
475
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000476#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000477/**
478 * UTF8ToUTF16LE:
479 * @outb: a pointer to an array of bytes to store the result
480 * @outlen: the length of @outb
481 * @in: a pointer to an array of UTF-8 chars
482 * @inlen: the length of @in
483 *
484 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
485 * block of chars out.
486 *
William M. Brackf9415e42003-11-28 09:39:10 +0000487 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000488 * if the transcoding failed.
489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000490static int
Owen Taylor3473f882001-02-23 17:55:21 +0000491UTF8ToUTF16LE(unsigned char* outb, int *outlen,
492 const unsigned char* in, int *inlen)
493{
494 unsigned short* out = (unsigned short*) outb;
495 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000496 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000497 unsigned short* outstart= out;
498 unsigned short* outend;
499 const unsigned char* inend= in+*inlen;
500 unsigned int c, d;
501 int trailing;
502 unsigned char *tmp;
503 unsigned short tmp1, tmp2;
504
William M. Brackf9415e42003-11-28 09:39:10 +0000505 /* UTF16LE encoding has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000506 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000507 *outlen = 0;
508 *inlen = 0;
509 return(0);
510 }
511 outend = out + (*outlen / 2);
512 while (in < inend) {
513 d= *in++;
514 if (d < 0x80) { c= d; trailing= 0; }
515 else if (d < 0xC0) {
516 /* trailing byte in leading position */
517 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000518 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000519 return(-2);
520 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
521 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
522 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
523 else {
524 /* no chance for this in UTF-16 */
525 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000526 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 return(-2);
528 }
529
530 if (inend - in < trailing) {
531 break;
532 }
533
534 for ( ; trailing; trailing--) {
535 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
536 break;
537 c <<= 6;
538 c |= d & 0x3F;
539 }
540
541 /* assertion: c is a single UTF-4 value */
542 if (c < 0x10000) {
543 if (out >= outend)
544 break;
545 if (xmlLittleEndian) {
546 *out++ = c;
547 } else {
548 tmp = (unsigned char *) out;
549 *tmp = c ;
550 *(tmp + 1) = c >> 8 ;
551 out++;
552 }
553 }
554 else if (c < 0x110000) {
555 if (out+1 >= outend)
556 break;
557 c -= 0x10000;
558 if (xmlLittleEndian) {
559 *out++ = 0xD800 | (c >> 10);
560 *out++ = 0xDC00 | (c & 0x03FF);
561 } else {
562 tmp1 = 0xD800 | (c >> 10);
563 tmp = (unsigned char *) out;
564 *tmp = (unsigned char) tmp1;
565 *(tmp + 1) = tmp1 >> 8;
566 out++;
567
568 tmp2 = 0xDC00 | (c & 0x03FF);
569 tmp = (unsigned char *) out;
570 *tmp = (unsigned char) tmp2;
571 *(tmp + 1) = tmp2 >> 8;
572 out++;
573 }
574 }
575 else
576 break;
577 processed = in;
578 }
579 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000580 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000581 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000582}
583
584/**
William M. Brackf9415e42003-11-28 09:39:10 +0000585 * UTF8ToUTF16:
586 * @outb: a pointer to an array of bytes to store the result
587 * @outlen: the length of @outb
588 * @in: a pointer to an array of UTF-8 chars
589 * @inlen: the length of @in
590 *
591 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
592 * block of chars out.
593 *
594 * Returns the number of bytes written, or -1 if lack of space, or -2
595 * if the transcoding failed.
596 */
597static int
598UTF8ToUTF16(unsigned char* outb, int *outlen,
599 const unsigned char* in, int *inlen)
600{
601 if (in == NULL) {
602 /*
603 * initialization, add the Byte Order Mark for UTF-16LE
604 */
605 if (*outlen >= 2) {
606 outb[0] = 0xFF;
607 outb[1] = 0xFE;
608 *outlen = 2;
609 *inlen = 0;
610#ifdef DEBUG_ENCODING
611 xmlGenericError(xmlGenericErrorContext,
612 "Added FFFE Byte Order Mark\n");
613#endif
614 return(2);
615 }
616 *outlen = 0;
617 *inlen = 0;
618 return(0);
619 }
620 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
621}
William M. Brack030a7a12004-02-10 12:48:57 +0000622#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000623
624/**
Owen Taylor3473f882001-02-23 17:55:21 +0000625 * UTF16BEToUTF8:
626 * @out: a pointer to an array of bytes to store the result
627 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000628 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000629 * @inlenb: the length of @in in UTF-16 chars
630 *
631 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000632 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * is the same between the native type of this machine and the
634 * inputed one.
635 *
William M. Brackf9415e42003-11-28 09:39:10 +0000636 * Returns the number of bytes written, or -1 if lack of space, or -2
637 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000639 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000640 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000641static int
Owen Taylor3473f882001-02-23 17:55:21 +0000642UTF16BEToUTF8(unsigned char* out, int *outlen,
643 const unsigned char* inb, int *inlenb)
644{
645 unsigned char* outstart = out;
646 const unsigned char* processed = inb;
647 unsigned char* outend = out + *outlen;
648 unsigned short* in = (unsigned short*) inb;
649 unsigned short* inend;
650 unsigned int c, d, inlen;
651 unsigned char *tmp;
652 int bits;
653
654 if ((*inlenb % 2) == 1)
655 (*inlenb)--;
656 inlen = *inlenb / 2;
657 inend= in + inlen;
658 while (in < inend) {
659 if (xmlLittleEndian) {
660 tmp = (unsigned char *) in;
661 c = *tmp++;
662 c = c << 8;
663 c = c | (unsigned int) *tmp;
664 in++;
665 } else {
666 c= *in++;
667 }
668 if ((c & 0xFC00) == 0xD800) { /* surrogates */
669 if (in >= inend) { /* (in > inend) shouldn't happens */
670 *outlen = out - outstart;
671 *inlenb = processed - inb;
672 return(-2);
673 }
674 if (xmlLittleEndian) {
675 tmp = (unsigned char *) in;
676 d = *tmp++;
677 d = d << 8;
678 d = d | (unsigned int) *tmp;
679 in++;
680 } else {
681 d= *in++;
682 }
683 if ((d & 0xFC00) == 0xDC00) {
684 c &= 0x03FF;
685 c <<= 10;
686 c |= d & 0x03FF;
687 c += 0x10000;
688 }
689 else {
690 *outlen = out - outstart;
691 *inlenb = processed - inb;
692 return(-2);
693 }
694 }
695
696 /* assertion: c is a single UTF-4 value */
697 if (out >= outend)
698 break;
699 if (c < 0x80) { *out++= c; bits= -6; }
700 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
701 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
702 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
703
704 for ( ; bits >= 0; bits-= 6) {
705 if (out >= outend)
706 break;
707 *out++= ((c >> bits) & 0x3F) | 0x80;
708 }
709 processed = (const unsigned char*) in;
710 }
711 *outlen = out - outstart;
712 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000713 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000714}
715
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000716#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000717/**
718 * UTF8ToUTF16BE:
719 * @outb: a pointer to an array of bytes to store the result
720 * @outlen: the length of @outb
721 * @in: a pointer to an array of UTF-8 chars
722 * @inlen: the length of @in
723 *
724 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
725 * block of chars out.
726 *
727 * Returns the number of byte written, or -1 by lack of space, or -2
728 * if the transcoding failed.
729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000730static int
Owen Taylor3473f882001-02-23 17:55:21 +0000731UTF8ToUTF16BE(unsigned char* outb, int *outlen,
732 const unsigned char* in, int *inlen)
733{
734 unsigned short* out = (unsigned short*) outb;
735 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000736 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000737 unsigned short* outstart= out;
738 unsigned short* outend;
739 const unsigned char* inend= in+*inlen;
740 unsigned int c, d;
741 int trailing;
742 unsigned char *tmp;
743 unsigned short tmp1, tmp2;
744
William M. Brackf9415e42003-11-28 09:39:10 +0000745 /* UTF-16BE has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000747 *outlen = 0;
748 *inlen = 0;
749 return(0);
750 }
751 outend = out + (*outlen / 2);
752 while (in < inend) {
753 d= *in++;
754 if (d < 0x80) { c= d; trailing= 0; }
755 else if (d < 0xC0) {
756 /* trailing byte in leading position */
757 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000758 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000759 return(-2);
760 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
761 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
762 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
763 else {
764 /* no chance for this in UTF-16 */
765 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000766 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000767 return(-2);
768 }
769
770 if (inend - in < trailing) {
771 break;
772 }
773
774 for ( ; trailing; trailing--) {
775 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
776 c <<= 6;
777 c |= d & 0x3F;
778 }
779
780 /* assertion: c is a single UTF-4 value */
781 if (c < 0x10000) {
782 if (out >= outend) break;
783 if (xmlLittleEndian) {
784 tmp = (unsigned char *) out;
785 *tmp = c >> 8;
786 *(tmp + 1) = c;
787 out++;
788 } else {
789 *out++ = c;
790 }
791 }
792 else if (c < 0x110000) {
793 if (out+1 >= outend) break;
794 c -= 0x10000;
795 if (xmlLittleEndian) {
796 tmp1 = 0xD800 | (c >> 10);
797 tmp = (unsigned char *) out;
798 *tmp = tmp1 >> 8;
799 *(tmp + 1) = (unsigned char) tmp1;
800 out++;
801
802 tmp2 = 0xDC00 | (c & 0x03FF);
803 tmp = (unsigned char *) out;
804 *tmp = tmp2 >> 8;
805 *(tmp + 1) = (unsigned char) tmp2;
806 out++;
807 } else {
808 *out++ = 0xD800 | (c >> 10);
809 *out++ = 0xDC00 | (c & 0x03FF);
810 }
811 }
812 else
813 break;
814 processed = in;
815 }
816 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000817 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000818 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000819}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000820#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000821
Daniel Veillard97ac1312001-05-30 19:14:17 +0000822/************************************************************************
823 * *
824 * Generic encoding handling routines *
825 * *
826 ************************************************************************/
827
Owen Taylor3473f882001-02-23 17:55:21 +0000828/**
829 * xmlDetectCharEncoding:
830 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000831 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @len: pointer to the length of the buffer
833 *
834 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000835 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000836 *
837 * Returns one of the XML_CHAR_ENCODING_... values.
838 */
839xmlCharEncoding
840xmlDetectCharEncoding(const unsigned char* in, int len)
841{
842 if (len >= 4) {
843 if ((in[0] == 0x00) && (in[1] == 0x00) &&
844 (in[2] == 0x00) && (in[3] == 0x3C))
845 return(XML_CHAR_ENCODING_UCS4BE);
846 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
847 (in[2] == 0x00) && (in[3] == 0x00))
848 return(XML_CHAR_ENCODING_UCS4LE);
849 if ((in[0] == 0x00) && (in[1] == 0x00) &&
850 (in[2] == 0x3C) && (in[3] == 0x00))
851 return(XML_CHAR_ENCODING_UCS4_2143);
852 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
853 (in[2] == 0x00) && (in[3] == 0x00))
854 return(XML_CHAR_ENCODING_UCS4_3412);
855 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
856 (in[2] == 0xA7) && (in[3] == 0x94))
857 return(XML_CHAR_ENCODING_EBCDIC);
858 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
859 (in[2] == 0x78) && (in[3] == 0x6D))
860 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000861 /*
862 * Although not part of the recommendation, we also
863 * attempt an "auto-recognition" of UTF-16LE and
864 * UTF-16BE encodings.
865 */
866 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
867 (in[2] == 0x3F) && (in[3] == 0x00))
868 return(XML_CHAR_ENCODING_UTF16LE);
869 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
870 (in[2] == 0x00) && (in[3] == 0x3F))
871 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000872 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000873 if (len >= 3) {
874 /*
875 * Errata on XML-1.0 June 20 2001
876 * We now allow an UTF8 encoded BOM
877 */
878 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
879 (in[2] == 0xBF))
880 return(XML_CHAR_ENCODING_UTF8);
881 }
William M. Brackf9415e42003-11-28 09:39:10 +0000882 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000883 if (len >= 2) {
884 if ((in[0] == 0xFE) && (in[1] == 0xFF))
885 return(XML_CHAR_ENCODING_UTF16BE);
886 if ((in[0] == 0xFF) && (in[1] == 0xFE))
887 return(XML_CHAR_ENCODING_UTF16LE);
888 }
889 return(XML_CHAR_ENCODING_NONE);
890}
891
892/**
893 * xmlCleanupEncodingAliases:
894 *
895 * Unregisters all aliases
896 */
897void
898xmlCleanupEncodingAliases(void) {
899 int i;
900
901 if (xmlCharEncodingAliases == NULL)
902 return;
903
904 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
905 if (xmlCharEncodingAliases[i].name != NULL)
906 xmlFree((char *) xmlCharEncodingAliases[i].name);
907 if (xmlCharEncodingAliases[i].alias != NULL)
908 xmlFree((char *) xmlCharEncodingAliases[i].alias);
909 }
910 xmlCharEncodingAliasesNb = 0;
911 xmlCharEncodingAliasesMax = 0;
912 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000913 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000914}
915
916/**
917 * xmlGetEncodingAlias:
918 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
919 *
920 * Lookup an encoding name for the given alias.
921 *
William M. Brackf9415e42003-11-28 09:39:10 +0000922 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000923 */
924const char *
925xmlGetEncodingAlias(const char *alias) {
926 int i;
927 char upper[100];
928
929 if (alias == NULL)
930 return(NULL);
931
932 if (xmlCharEncodingAliases == NULL)
933 return(NULL);
934
935 for (i = 0;i < 99;i++) {
936 upper[i] = toupper(alias[i]);
937 if (upper[i] == 0) break;
938 }
939 upper[i] = 0;
940
941 /*
942 * Walk down the list looking for a definition of the alias
943 */
944 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
945 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
946 return(xmlCharEncodingAliases[i].name);
947 }
948 }
949 return(NULL);
950}
951
952/**
953 * xmlAddEncodingAlias:
954 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
955 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
956 *
William M. Brackf9415e42003-11-28 09:39:10 +0000957 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000958 * will be overwritten.
959 *
960 * Returns 0 in case of success, -1 in case of error
961 */
962int
963xmlAddEncodingAlias(const char *name, const char *alias) {
964 int i;
965 char upper[100];
966
967 if ((name == NULL) || (alias == NULL))
968 return(-1);
969
970 for (i = 0;i < 99;i++) {
971 upper[i] = toupper(alias[i]);
972 if (upper[i] == 0) break;
973 }
974 upper[i] = 0;
975
976 if (xmlCharEncodingAliases == NULL) {
977 xmlCharEncodingAliasesNb = 0;
978 xmlCharEncodingAliasesMax = 20;
979 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
980 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
981 if (xmlCharEncodingAliases == NULL)
982 return(-1);
983 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
984 xmlCharEncodingAliasesMax *= 2;
985 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
986 xmlRealloc(xmlCharEncodingAliases,
987 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
988 }
989 /*
990 * Walk down the list looking for a definition of the alias
991 */
992 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
993 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
994 /*
995 * Replace the definition.
996 */
997 xmlFree((char *) xmlCharEncodingAliases[i].name);
998 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
999 return(0);
1000 }
1001 }
1002 /*
1003 * Add the definition
1004 */
1005 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1006 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1007 xmlCharEncodingAliasesNb++;
1008 return(0);
1009}
1010
1011/**
1012 * xmlDelEncodingAlias:
1013 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1014 *
1015 * Unregisters an encoding alias @alias
1016 *
1017 * Returns 0 in case of success, -1 in case of error
1018 */
1019int
1020xmlDelEncodingAlias(const char *alias) {
1021 int i;
1022
1023 if (alias == NULL)
1024 return(-1);
1025
1026 if (xmlCharEncodingAliases == NULL)
1027 return(-1);
1028 /*
1029 * Walk down the list looking for a definition of the alias
1030 */
1031 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1032 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1033 xmlFree((char *) xmlCharEncodingAliases[i].name);
1034 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1035 xmlCharEncodingAliasesNb--;
1036 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1037 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1038 return(0);
1039 }
1040 }
1041 return(-1);
1042}
1043
1044/**
1045 * xmlParseCharEncoding:
1046 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1047 *
William M. Brackf9415e42003-11-28 09:39:10 +00001048 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001049 * that the comparison is case insensitive accordingly to the section
1050 * [XML] 4.3.3 Character Encoding in Entities.
1051 *
1052 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1053 * if not recognized.
1054 */
1055xmlCharEncoding
1056xmlParseCharEncoding(const char* name)
1057{
1058 const char *alias;
1059 char upper[500];
1060 int i;
1061
1062 if (name == NULL)
1063 return(XML_CHAR_ENCODING_NONE);
1064
1065 /*
1066 * Do the alias resolution
1067 */
1068 alias = xmlGetEncodingAlias(name);
1069 if (alias != NULL)
1070 name = alias;
1071
1072 for (i = 0;i < 499;i++) {
1073 upper[i] = toupper(name[i]);
1074 if (upper[i] == 0) break;
1075 }
1076 upper[i] = 0;
1077
1078 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1079 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1080 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1081
1082 /*
1083 * NOTE: if we were able to parse this, the endianness of UTF16 is
1084 * already found and in use
1085 */
1086 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1087 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1088
1089 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1090 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1091 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1092
1093 /*
1094 * NOTE: if we were able to parse this, the endianness of UCS4 is
1095 * already found and in use
1096 */
1097 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1098 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1099 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1100
1101
1102 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1103 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1104 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1105
1106 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1107 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1108 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1109
1110 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1111 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1112 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1113 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1114 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1115 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1116 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1117
1118 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1119 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1120 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1121
1122#ifdef DEBUG_ENCODING
1123 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1124#endif
1125 return(XML_CHAR_ENCODING_ERROR);
1126}
1127
1128/**
1129 * xmlGetCharEncodingName:
1130 * @enc: the encoding
1131 *
1132 * The "canonical" name for XML encoding.
1133 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1134 * Section 4.3.3 Character Encoding in Entities
1135 *
1136 * Returns the canonical name for the given encoding
1137 */
1138
1139const char*
1140xmlGetCharEncodingName(xmlCharEncoding enc) {
1141 switch (enc) {
1142 case XML_CHAR_ENCODING_ERROR:
1143 return(NULL);
1144 case XML_CHAR_ENCODING_NONE:
1145 return(NULL);
1146 case XML_CHAR_ENCODING_UTF8:
1147 return("UTF-8");
1148 case XML_CHAR_ENCODING_UTF16LE:
1149 return("UTF-16");
1150 case XML_CHAR_ENCODING_UTF16BE:
1151 return("UTF-16");
1152 case XML_CHAR_ENCODING_EBCDIC:
1153 return("EBCDIC");
1154 case XML_CHAR_ENCODING_UCS4LE:
1155 return("ISO-10646-UCS-4");
1156 case XML_CHAR_ENCODING_UCS4BE:
1157 return("ISO-10646-UCS-4");
1158 case XML_CHAR_ENCODING_UCS4_2143:
1159 return("ISO-10646-UCS-4");
1160 case XML_CHAR_ENCODING_UCS4_3412:
1161 return("ISO-10646-UCS-4");
1162 case XML_CHAR_ENCODING_UCS2:
1163 return("ISO-10646-UCS-2");
1164 case XML_CHAR_ENCODING_8859_1:
1165 return("ISO-8859-1");
1166 case XML_CHAR_ENCODING_8859_2:
1167 return("ISO-8859-2");
1168 case XML_CHAR_ENCODING_8859_3:
1169 return("ISO-8859-3");
1170 case XML_CHAR_ENCODING_8859_4:
1171 return("ISO-8859-4");
1172 case XML_CHAR_ENCODING_8859_5:
1173 return("ISO-8859-5");
1174 case XML_CHAR_ENCODING_8859_6:
1175 return("ISO-8859-6");
1176 case XML_CHAR_ENCODING_8859_7:
1177 return("ISO-8859-7");
1178 case XML_CHAR_ENCODING_8859_8:
1179 return("ISO-8859-8");
1180 case XML_CHAR_ENCODING_8859_9:
1181 return("ISO-8859-9");
1182 case XML_CHAR_ENCODING_2022_JP:
1183 return("ISO-2022-JP");
1184 case XML_CHAR_ENCODING_SHIFT_JIS:
1185 return("Shift-JIS");
1186 case XML_CHAR_ENCODING_EUC_JP:
1187 return("EUC-JP");
1188 case XML_CHAR_ENCODING_ASCII:
1189 return(NULL);
1190 }
1191 return(NULL);
1192}
1193
Daniel Veillard97ac1312001-05-30 19:14:17 +00001194/************************************************************************
1195 * *
1196 * Char encoding handlers *
1197 * *
1198 ************************************************************************/
1199
Owen Taylor3473f882001-02-23 17:55:21 +00001200
1201/* the size should be growable, but it's not a big deal ... */
1202#define MAX_ENCODING_HANDLERS 50
1203static xmlCharEncodingHandlerPtr *handlers = NULL;
1204static int nbCharEncodingHandler = 0;
1205
1206/*
1207 * The default is UTF-8 for XML, that's also the default used for the
1208 * parser internals, so the default encoding handler is NULL
1209 */
1210
1211static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1212
1213/**
1214 * xmlNewCharEncodingHandler:
1215 * @name: the encoding name, in UTF-8 format (ASCII actually)
1216 * @input: the xmlCharEncodingInputFunc to read that encoding
1217 * @output: the xmlCharEncodingOutputFunc to write that encoding
1218 *
1219 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001220 *
Owen Taylor3473f882001-02-23 17:55:21 +00001221 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1222 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001223xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001224xmlNewCharEncodingHandler(const char *name,
1225 xmlCharEncodingInputFunc input,
1226 xmlCharEncodingOutputFunc output) {
1227 xmlCharEncodingHandlerPtr handler;
1228 const char *alias;
1229 char upper[500];
1230 int i;
1231 char *up = 0;
1232
1233 /*
1234 * Do the alias resolution
1235 */
1236 alias = xmlGetEncodingAlias(name);
1237 if (alias != NULL)
1238 name = alias;
1239
1240 /*
1241 * Keep only the uppercase version of the encoding.
1242 */
1243 if (name == NULL) {
1244 xmlGenericError(xmlGenericErrorContext,
1245 "xmlNewCharEncodingHandler : no name !\n");
1246 return(NULL);
1247 }
1248 for (i = 0;i < 499;i++) {
1249 upper[i] = toupper(name[i]);
1250 if (upper[i] == 0) break;
1251 }
1252 upper[i] = 0;
1253 up = xmlMemStrdup(upper);
1254 if (up == NULL) {
1255 xmlGenericError(xmlGenericErrorContext,
1256 "xmlNewCharEncodingHandler : out of memory !\n");
1257 return(NULL);
1258 }
1259
1260 /*
1261 * allocate and fill-up an handler block.
1262 */
1263 handler = (xmlCharEncodingHandlerPtr)
1264 xmlMalloc(sizeof(xmlCharEncodingHandler));
1265 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001266 xmlFree(up);
Owen Taylor3473f882001-02-23 17:55:21 +00001267 xmlGenericError(xmlGenericErrorContext,
1268 "xmlNewCharEncodingHandler : out of memory !\n");
1269 return(NULL);
1270 }
1271 handler->input = input;
1272 handler->output = output;
1273 handler->name = up;
1274
1275#ifdef LIBXML_ICONV_ENABLED
1276 handler->iconv_in = NULL;
1277 handler->iconv_out = NULL;
1278#endif /* LIBXML_ICONV_ENABLED */
1279
1280 /*
1281 * registers and returns the handler.
1282 */
1283 xmlRegisterCharEncodingHandler(handler);
1284#ifdef DEBUG_ENCODING
1285 xmlGenericError(xmlGenericErrorContext,
1286 "Registered encoding handler for %s\n", name);
1287#endif
1288 return(handler);
1289}
1290
1291/**
1292 * xmlInitCharEncodingHandlers:
1293 *
1294 * Initialize the char encoding support, it registers the default
1295 * encoding supported.
1296 * NOTE: while public, this function usually doesn't need to be called
1297 * in normal processing.
1298 */
1299void
1300xmlInitCharEncodingHandlers(void) {
1301 unsigned short int tst = 0x1234;
1302 unsigned char *ptr = (unsigned char *) &tst;
1303
1304 if (handlers != NULL) return;
1305
1306 handlers = (xmlCharEncodingHandlerPtr *)
1307 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1308
1309 if (*ptr == 0x12) xmlLittleEndian = 0;
1310 else if (*ptr == 0x34) xmlLittleEndian = 1;
1311 else xmlGenericError(xmlGenericErrorContext,
1312 "Odd problem at endianness detection\n");
1313
1314 if (handlers == NULL) {
1315 xmlGenericError(xmlGenericErrorContext,
1316 "xmlInitCharEncodingHandlers : out of memory !\n");
1317 return;
1318 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001319 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001320#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001321 xmlUTF16LEHandler =
1322 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1323 xmlUTF16BEHandler =
1324 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001325 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001326 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1327 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001328 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001329#ifdef LIBXML_HTML_ENABLED
1330 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1331#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001332#else
1333 xmlUTF16LEHandler =
1334 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1335 xmlUTF16BEHandler =
1336 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001337 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001338 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1339 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1340 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1341#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001342#ifndef LIBXML_ICONV_ENABLED
1343#ifdef LIBXML_ISO8859X_ENABLED
1344 xmlRegisterCharEncodingHandlersISO8859x ();
1345#endif
1346#endif
1347
Owen Taylor3473f882001-02-23 17:55:21 +00001348}
1349
1350/**
1351 * xmlCleanupCharEncodingHandlers:
1352 *
1353 * Cleanup the memory allocated for the char encoding support, it
1354 * unregisters all the encoding handlers and the aliases.
1355 */
1356void
1357xmlCleanupCharEncodingHandlers(void) {
1358 xmlCleanupEncodingAliases();
1359
1360 if (handlers == NULL) return;
1361
1362 for (;nbCharEncodingHandler > 0;) {
1363 nbCharEncodingHandler--;
1364 if (handlers[nbCharEncodingHandler] != NULL) {
1365 if (handlers[nbCharEncodingHandler]->name != NULL)
1366 xmlFree(handlers[nbCharEncodingHandler]->name);
1367 xmlFree(handlers[nbCharEncodingHandler]);
1368 }
1369 }
1370 xmlFree(handlers);
1371 handlers = NULL;
1372 nbCharEncodingHandler = 0;
1373 xmlDefaultCharEncodingHandler = NULL;
1374}
1375
1376/**
1377 * xmlRegisterCharEncodingHandler:
1378 * @handler: the xmlCharEncodingHandlerPtr handler block
1379 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001380 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001381 */
1382void
1383xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1384 if (handlers == NULL) xmlInitCharEncodingHandlers();
1385 if (handler == NULL) {
1386 xmlGenericError(xmlGenericErrorContext,
1387 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1388 return;
1389 }
1390
1391 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1392 xmlGenericError(xmlGenericErrorContext,
1393 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1394 xmlGenericError(xmlGenericErrorContext,
1395 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1396 return;
1397 }
1398 handlers[nbCharEncodingHandler++] = handler;
1399}
1400
1401/**
1402 * xmlGetCharEncodingHandler:
1403 * @enc: an xmlCharEncoding value.
1404 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001405 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001406 *
1407 * Returns the handler or NULL if not found
1408 */
1409xmlCharEncodingHandlerPtr
1410xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1411 xmlCharEncodingHandlerPtr handler;
1412
1413 if (handlers == NULL) xmlInitCharEncodingHandlers();
1414 switch (enc) {
1415 case XML_CHAR_ENCODING_ERROR:
1416 return(NULL);
1417 case XML_CHAR_ENCODING_NONE:
1418 return(NULL);
1419 case XML_CHAR_ENCODING_UTF8:
1420 return(NULL);
1421 case XML_CHAR_ENCODING_UTF16LE:
1422 return(xmlUTF16LEHandler);
1423 case XML_CHAR_ENCODING_UTF16BE:
1424 return(xmlUTF16BEHandler);
1425 case XML_CHAR_ENCODING_EBCDIC:
1426 handler = xmlFindCharEncodingHandler("EBCDIC");
1427 if (handler != NULL) return(handler);
1428 handler = xmlFindCharEncodingHandler("ebcdic");
1429 if (handler != NULL) return(handler);
1430 break;
1431 case XML_CHAR_ENCODING_UCS4BE:
1432 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1433 if (handler != NULL) return(handler);
1434 handler = xmlFindCharEncodingHandler("UCS-4");
1435 if (handler != NULL) return(handler);
1436 handler = xmlFindCharEncodingHandler("UCS4");
1437 if (handler != NULL) return(handler);
1438 break;
1439 case XML_CHAR_ENCODING_UCS4LE:
1440 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1441 if (handler != NULL) return(handler);
1442 handler = xmlFindCharEncodingHandler("UCS-4");
1443 if (handler != NULL) return(handler);
1444 handler = xmlFindCharEncodingHandler("UCS4");
1445 if (handler != NULL) return(handler);
1446 break;
1447 case XML_CHAR_ENCODING_UCS4_2143:
1448 break;
1449 case XML_CHAR_ENCODING_UCS4_3412:
1450 break;
1451 case XML_CHAR_ENCODING_UCS2:
1452 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1453 if (handler != NULL) return(handler);
1454 handler = xmlFindCharEncodingHandler("UCS-2");
1455 if (handler != NULL) return(handler);
1456 handler = xmlFindCharEncodingHandler("UCS2");
1457 if (handler != NULL) return(handler);
1458 break;
1459
1460 /*
1461 * We used to keep ISO Latin encodings native in the
1462 * generated data. This led to so many problems that
1463 * this has been removed. One can still change this
1464 * back by registering no-ops encoders for those
1465 */
1466 case XML_CHAR_ENCODING_8859_1:
1467 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1468 if (handler != NULL) return(handler);
1469 break;
1470 case XML_CHAR_ENCODING_8859_2:
1471 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1472 if (handler != NULL) return(handler);
1473 break;
1474 case XML_CHAR_ENCODING_8859_3:
1475 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1476 if (handler != NULL) return(handler);
1477 break;
1478 case XML_CHAR_ENCODING_8859_4:
1479 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1480 if (handler != NULL) return(handler);
1481 break;
1482 case XML_CHAR_ENCODING_8859_5:
1483 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1484 if (handler != NULL) return(handler);
1485 break;
1486 case XML_CHAR_ENCODING_8859_6:
1487 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1488 if (handler != NULL) return(handler);
1489 break;
1490 case XML_CHAR_ENCODING_8859_7:
1491 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1492 if (handler != NULL) return(handler);
1493 break;
1494 case XML_CHAR_ENCODING_8859_8:
1495 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1496 if (handler != NULL) return(handler);
1497 break;
1498 case XML_CHAR_ENCODING_8859_9:
1499 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1500 if (handler != NULL) return(handler);
1501 break;
1502
1503
1504 case XML_CHAR_ENCODING_2022_JP:
1505 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1506 if (handler != NULL) return(handler);
1507 break;
1508 case XML_CHAR_ENCODING_SHIFT_JIS:
1509 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1510 if (handler != NULL) return(handler);
1511 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1512 if (handler != NULL) return(handler);
1513 handler = xmlFindCharEncodingHandler("Shift_JIS");
1514 if (handler != NULL) return(handler);
1515 break;
1516 case XML_CHAR_ENCODING_EUC_JP:
1517 handler = xmlFindCharEncodingHandler("EUC-JP");
1518 if (handler != NULL) return(handler);
1519 break;
1520 default:
1521 break;
1522 }
1523
1524#ifdef DEBUG_ENCODING
1525 xmlGenericError(xmlGenericErrorContext,
1526 "No handler found for encoding %d\n", enc);
1527#endif
1528 return(NULL);
1529}
1530
1531/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001532 * xmlFindCharEncodingHandler:
1533 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001534 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001535 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001536 *
1537 * Returns the handler or NULL if not found
1538 */
1539xmlCharEncodingHandlerPtr
1540xmlFindCharEncodingHandler(const char *name) {
1541 const char *nalias;
1542 const char *norig;
1543 xmlCharEncoding alias;
1544#ifdef LIBXML_ICONV_ENABLED
1545 xmlCharEncodingHandlerPtr enc;
1546 iconv_t icv_in, icv_out;
1547#endif /* LIBXML_ICONV_ENABLED */
1548 char upper[100];
1549 int i;
1550
1551 if (handlers == NULL) xmlInitCharEncodingHandlers();
1552 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1553 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1554
1555 /*
1556 * Do the alias resolution
1557 */
1558 norig = name;
1559 nalias = xmlGetEncodingAlias(name);
1560 if (nalias != NULL)
1561 name = nalias;
1562
1563 /*
1564 * Check first for directly registered encoding names
1565 */
1566 for (i = 0;i < 99;i++) {
1567 upper[i] = toupper(name[i]);
1568 if (upper[i] == 0) break;
1569 }
1570 upper[i] = 0;
1571
1572 for (i = 0;i < nbCharEncodingHandler; i++)
1573 if (!strcmp(upper, handlers[i]->name)) {
1574#ifdef DEBUG_ENCODING
1575 xmlGenericError(xmlGenericErrorContext,
1576 "Found registered handler for encoding %s\n", name);
1577#endif
1578 return(handlers[i]);
1579 }
1580
1581#ifdef LIBXML_ICONV_ENABLED
1582 /* check whether iconv can handle this */
1583 icv_in = iconv_open("UTF-8", name);
1584 icv_out = iconv_open(name, "UTF-8");
1585 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1586 enc = (xmlCharEncodingHandlerPtr)
1587 xmlMalloc(sizeof(xmlCharEncodingHandler));
1588 if (enc == NULL) {
1589 iconv_close(icv_in);
1590 iconv_close(icv_out);
1591 return(NULL);
1592 }
1593 enc->name = xmlMemStrdup(name);
1594 enc->input = NULL;
1595 enc->output = NULL;
1596 enc->iconv_in = icv_in;
1597 enc->iconv_out = icv_out;
1598#ifdef DEBUG_ENCODING
1599 xmlGenericError(xmlGenericErrorContext,
1600 "Found iconv handler for encoding %s\n", name);
1601#endif
1602 return enc;
1603 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1604 xmlGenericError(xmlGenericErrorContext,
1605 "iconv : problems with filters for '%s'\n", name);
1606 }
1607#endif /* LIBXML_ICONV_ENABLED */
1608
1609#ifdef DEBUG_ENCODING
1610 xmlGenericError(xmlGenericErrorContext,
1611 "No handler found for encoding %s\n", name);
1612#endif
1613
1614 /*
1615 * Fallback using the canonical names
1616 */
1617 alias = xmlParseCharEncoding(norig);
1618 if (alias != XML_CHAR_ENCODING_ERROR) {
1619 const char* canon;
1620 canon = xmlGetCharEncodingName(alias);
1621 if ((canon != NULL) && (strcmp(name, canon))) {
1622 return(xmlFindCharEncodingHandler(canon));
1623 }
1624 }
1625
William M. Brackf9415e42003-11-28 09:39:10 +00001626 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001627 return(NULL);
1628}
1629
Daniel Veillard97ac1312001-05-30 19:14:17 +00001630/************************************************************************
1631 * *
1632 * ICONV based generic conversion functions *
1633 * *
1634 ************************************************************************/
1635
Owen Taylor3473f882001-02-23 17:55:21 +00001636#ifdef LIBXML_ICONV_ENABLED
1637/**
1638 * xmlIconvWrapper:
1639 * @cd: iconv converter data structure
1640 * @out: a pointer to an array of bytes to store the result
1641 * @outlen: the length of @out
1642 * @in: a pointer to an array of ISO Latin 1 chars
1643 * @inlen: the length of @in
1644 *
1645 * Returns 0 if success, or
1646 * -1 by lack of space, or
1647 * -2 if the transcoding fails (for *in is not valid utf8 string or
1648 * the result of transformation can't fit into the encoding we want), or
1649 * -3 if there the last byte can't form a single output char.
1650 *
1651 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001652 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001653 * The value of @outlen after return is the number of ocetes consumed.
1654 */
1655static int
1656xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00001657 unsigned char *out, int *outlen,
1658 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00001659
Daniel Veillard9403a042001-05-28 11:00:53 +00001660 size_t icv_inlen = *inlen, icv_outlen = *outlen;
1661 const char *icv_in = (const char *) in;
1662 char *icv_out = (char *) out;
1663 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001664
Darin Adler699613b2001-07-27 22:47:14 +00001665 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001666 if (in != NULL) {
1667 *inlen -= icv_inlen;
1668 *outlen -= icv_outlen;
1669 } else {
1670 *inlen = 0;
1671 *outlen = 0;
1672 }
1673 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001674#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001675 if (errno == EILSEQ) {
1676 return -2;
1677 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001678#endif
1679#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001680 if (errno == E2BIG) {
1681 return -1;
1682 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001683#endif
1684#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001685 if (errno == EINVAL) {
1686 return -3;
1687 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001688#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001689 {
1690 return -3;
1691 }
1692 }
1693 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001694}
1695#endif /* LIBXML_ICONV_ENABLED */
1696
Daniel Veillard97ac1312001-05-30 19:14:17 +00001697/************************************************************************
1698 * *
1699 * The real API used by libxml for on-the-fly conversion *
1700 * *
1701 ************************************************************************/
1702
Owen Taylor3473f882001-02-23 17:55:21 +00001703/**
1704 * xmlCharEncFirstLine:
1705 * @handler: char enconding transformation data structure
1706 * @out: an xmlBuffer for the output.
1707 * @in: an xmlBuffer for the input
1708 *
1709 * Front-end for the encoding handler input function, but handle only
1710 * the very first line, i.e. limit itself to 45 chars.
1711 *
1712 * Returns the number of byte written if success, or
1713 * -1 general error
1714 * -2 if the transcoding fails (for *in is not valid utf8 string or
1715 * the result of transformation can't fit into the encoding we want), or
1716 */
1717int
1718xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1719 xmlBufferPtr in) {
1720 int ret = -2;
1721 int written;
1722 int toconv;
1723
1724 if (handler == NULL) return(-1);
1725 if (out == NULL) return(-1);
1726 if (in == NULL) return(-1);
1727
1728 written = out->size - out->use;
1729 toconv = in->use;
1730 if (toconv * 2 >= written) {
1731 xmlBufferGrow(out, toconv);
1732 written = out->size - out->use - 1;
1733 }
1734
1735 /*
1736 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1737 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001738 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001739 */
1740 written = 45;
1741
1742 if (handler->input != NULL) {
1743 ret = handler->input(&out->content[out->use], &written,
1744 in->content, &toconv);
1745 xmlBufferShrink(in, toconv);
1746 out->use += written;
1747 out->content[out->use] = 0;
1748 }
1749#ifdef LIBXML_ICONV_ENABLED
1750 else if (handler->iconv_in != NULL) {
1751 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1752 &written, in->content, &toconv);
1753 xmlBufferShrink(in, toconv);
1754 out->use += written;
1755 out->content[out->use] = 0;
1756 if (ret == -1) ret = -3;
1757 }
1758#endif /* LIBXML_ICONV_ENABLED */
1759#ifdef DEBUG_ENCODING
1760 switch (ret) {
1761 case 0:
1762 xmlGenericError(xmlGenericErrorContext,
1763 "converted %d bytes to %d bytes of input\n",
1764 toconv, written);
1765 break;
1766 case -1:
1767 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1768 toconv, written, in->use);
1769 break;
1770 case -2:
1771 xmlGenericError(xmlGenericErrorContext,
1772 "input conversion failed due to input error\n");
1773 break;
1774 case -3:
1775 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1776 toconv, written, in->use);
1777 break;
1778 default:
1779 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1780 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001781#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001782 /*
1783 * Ignore when input buffer is not on a boundary
1784 */
1785 if (ret == -3) ret = 0;
1786 if (ret == -1) ret = 0;
1787 return(ret);
1788}
1789
1790/**
1791 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001792 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001793 * @out: an xmlBuffer for the output.
1794 * @in: an xmlBuffer for the input
1795 *
1796 * Generic front-end for the encoding handler input function
1797 *
1798 * Returns the number of byte written if success, or
1799 * -1 general error
1800 * -2 if the transcoding fails (for *in is not valid utf8 string or
1801 * the result of transformation can't fit into the encoding we want), or
1802 */
1803int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001804xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1805 xmlBufferPtr in)
1806{
Owen Taylor3473f882001-02-23 17:55:21 +00001807 int ret = -2;
1808 int written;
1809 int toconv;
1810
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001811 if (handler == NULL)
1812 return (-1);
1813 if (out == NULL)
1814 return (-1);
1815 if (in == NULL)
1816 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001817
1818 toconv = in->use;
1819 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001820 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001821 written = out->size - out->use;
1822 if (toconv * 2 >= written) {
1823 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001824 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001825 }
1826 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001827 ret = handler->input(&out->content[out->use], &written,
1828 in->content, &toconv);
1829 xmlBufferShrink(in, toconv);
1830 out->use += written;
1831 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001832 }
1833#ifdef LIBXML_ICONV_ENABLED
1834 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001835 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1836 &written, in->content, &toconv);
1837 xmlBufferShrink(in, toconv);
1838 out->use += written;
1839 out->content[out->use] = 0;
1840 if (ret == -1)
1841 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001842 }
1843#endif /* LIBXML_ICONV_ENABLED */
1844 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001845 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001846#ifdef DEBUG_ENCODING
1847 xmlGenericError(xmlGenericErrorContext,
1848 "converted %d bytes to %d bytes of input\n",
1849 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001850#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001851 break;
1852 case -1:
1853#ifdef DEBUG_ENCODING
1854 xmlGenericError(xmlGenericErrorContext,
1855 "converted %d bytes to %d bytes of input, %d left\n",
1856 toconv, written, in->use);
1857#endif
1858 break;
1859 case -3:
1860#ifdef DEBUG_ENCODING
1861 xmlGenericError(xmlGenericErrorContext,
1862 "converted %d bytes to %d bytes of input, %d left\n",
1863 toconv, written, in->use);
1864#endif
1865 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001866 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001867 xmlGenericError(xmlGenericErrorContext,
1868 "input conversion failed due to input error\n");
1869 xmlGenericError(xmlGenericErrorContext,
1870 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1871 in->content[0], in->content[1],
1872 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001873 }
1874 /*
1875 * Ignore when input buffer is not on a boundary
1876 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001877 if (ret == -3)
1878 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001879 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001880}
1881
1882/**
1883 * xmlCharEncOutFunc:
1884 * @handler: char enconding transformation data structure
1885 * @out: an xmlBuffer for the output.
1886 * @in: an xmlBuffer for the input
1887 *
1888 * Generic front-end for the encoding handler output function
1889 * a first call with @in == NULL has to be made firs to initiate the
1890 * output in case of non-stateless encoding needing to initiate their
1891 * state or the output (like the BOM in UTF16).
1892 * In case of UTF8 sequence conversion errors for the given encoder,
1893 * the content will be automatically remapped to a CharRef sequence.
1894 *
1895 * Returns the number of byte written if success, or
1896 * -1 general error
1897 * -2 if the transcoding fails (for *in is not valid utf8 string or
1898 * the result of transformation can't fit into the encoding we want), or
1899 */
1900int
1901xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1902 xmlBufferPtr in) {
1903 int ret = -2;
1904 int written;
1905 int writtentot = 0;
1906 int toconv;
1907 int output = 0;
1908
1909 if (handler == NULL) return(-1);
1910 if (out == NULL) return(-1);
1911
1912retry:
1913
1914 written = out->size - out->use;
1915
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001916 if (written > 0)
1917 written--; /* Gennady: count '/0' */
1918
Owen Taylor3473f882001-02-23 17:55:21 +00001919 /*
1920 * First specific handling of in = NULL, i.e. the initialization call
1921 */
1922 if (in == NULL) {
1923 toconv = 0;
1924 if (handler->output != NULL) {
1925 ret = handler->output(&out->content[out->use], &written,
1926 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001927 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001928 out->use += written;
1929 out->content[out->use] = 0;
1930 }
Owen Taylor3473f882001-02-23 17:55:21 +00001931 }
1932#ifdef LIBXML_ICONV_ENABLED
1933 else if (handler->iconv_out != NULL) {
1934 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1935 &written, NULL, &toconv);
1936 out->use += written;
1937 out->content[out->use] = 0;
1938 }
1939#endif /* LIBXML_ICONV_ENABLED */
1940#ifdef DEBUG_ENCODING
1941 xmlGenericError(xmlGenericErrorContext,
1942 "initialized encoder\n");
1943#endif
1944 return(0);
1945 }
1946
1947 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001948 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001949 */
1950 toconv = in->use;
1951 if (toconv == 0)
1952 return(0);
1953 if (toconv * 2 >= written) {
1954 xmlBufferGrow(out, toconv * 2);
1955 written = out->size - out->use - 1;
1956 }
1957 if (handler->output != NULL) {
1958 ret = handler->output(&out->content[out->use], &written,
1959 in->content, &toconv);
1960 xmlBufferShrink(in, toconv);
1961 out->use += written;
1962 writtentot += written;
1963 out->content[out->use] = 0;
1964 }
1965#ifdef LIBXML_ICONV_ENABLED
1966 else if (handler->iconv_out != NULL) {
1967 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1968 &written, in->content, &toconv);
1969 xmlBufferShrink(in, toconv);
1970 out->use += written;
1971 writtentot += written;
1972 out->content[out->use] = 0;
1973 if (ret == -1) {
1974 if (written > 0) {
1975 /*
1976 * Can be a limitation of iconv
1977 */
1978 goto retry;
1979 }
1980 ret = -3;
1981 }
1982 }
1983#endif /* LIBXML_ICONV_ENABLED */
1984 else {
1985 xmlGenericError(xmlGenericErrorContext,
1986 "xmlCharEncOutFunc: no output function !\n");
1987 return(-1);
1988 }
1989
1990 if (ret >= 0) output += ret;
1991
1992 /*
1993 * Attempt to handle error cases
1994 */
1995 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001996 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001997#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00001998 xmlGenericError(xmlGenericErrorContext,
1999 "converted %d bytes to %d bytes of output\n",
2000 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002001#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002002 break;
2003 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002004#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002005 xmlGenericError(xmlGenericErrorContext,
2006 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002007#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002008 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002009 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002010#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002011 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2012 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002013#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002014 break;
2015 case -2: {
2016 int len = in->use;
2017 const xmlChar *utf = (const xmlChar *) in->content;
2018 int cur;
2019
2020 cur = xmlGetUTF8Char(utf, &len);
2021 if (cur > 0) {
2022 xmlChar charref[20];
2023
2024#ifdef DEBUG_ENCODING
2025 xmlGenericError(xmlGenericErrorContext,
2026 "handling output conversion error\n");
2027 xmlGenericError(xmlGenericErrorContext,
2028 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2029 in->content[0], in->content[1],
2030 in->content[2], in->content[3]);
2031#endif
2032 /*
2033 * Removes the UTF8 sequence, and replace it by a charref
2034 * and continue the transcoding phase, hoping the error
2035 * did not mangle the encoder state.
2036 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002037 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002038 xmlBufferShrink(in, len);
2039 xmlBufferAddHead(in, charref, -1);
2040
2041 goto retry;
2042 } else {
2043 xmlGenericError(xmlGenericErrorContext,
2044 "output conversion failed due to conv error\n");
2045 xmlGenericError(xmlGenericErrorContext,
2046 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2047 in->content[0], in->content[1],
2048 in->content[2], in->content[3]);
2049 in->content[0] = ' ';
2050 }
2051 break;
2052 }
2053 }
2054 return(ret);
2055}
2056
2057/**
2058 * xmlCharEncCloseFunc:
2059 * @handler: char enconding transformation data structure
2060 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002062 *
2063 * Returns 0 if success, or -1 in case of error
2064 */
2065int
2066xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2067 int ret = 0;
2068 if (handler == NULL) return(-1);
2069 if (handler->name == NULL) return(-1);
2070#ifdef LIBXML_ICONV_ENABLED
2071 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002072 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002073 * and the associated icon resources.
2074 */
2075 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2076 if (handler->name != NULL)
2077 xmlFree(handler->name);
2078 handler->name = NULL;
2079 if (handler->iconv_out != NULL) {
2080 if (iconv_close(handler->iconv_out))
2081 ret = -1;
2082 handler->iconv_out = NULL;
2083 }
2084 if (handler->iconv_in != NULL) {
2085 if (iconv_close(handler->iconv_in))
2086 ret = -1;
2087 handler->iconv_in = NULL;
2088 }
2089 xmlFree(handler);
2090 }
2091#endif /* LIBXML_ICONV_ENABLED */
2092#ifdef DEBUG_ENCODING
2093 if (ret)
2094 xmlGenericError(xmlGenericErrorContext,
2095 "failed to close the encoding handler\n");
2096 else
2097 xmlGenericError(xmlGenericErrorContext,
2098 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002099#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002100
Owen Taylor3473f882001-02-23 17:55:21 +00002101 return(ret);
2102}
2103
Daniel Veillard36711902004-02-11 13:25:26 +00002104/**
2105 * xmlByteConsumed:
2106 * @ctxt: an XML parser context
2107 *
2108 * This function provides the current index of the parser relative
2109 * to the start of the current entity. This function is computed in
2110 * bytes from the beginning starting at zero and finishing at the
2111 * size in byte of the file if parsing a file. The function is
2112 * of constant cost if the input is UTF-8 but can be costly if run
2113 * on non-UTF-8 input.
2114 *
2115 * Returns the index in bytes from the beginning of the entity or -1
2116 * in case the index could not be computed.
2117 */
2118long
2119xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2120 xmlParserInputPtr in;
2121
2122 if (ctxt == NULL) return(-1);
2123 in = ctxt->input;
2124 if (in == NULL) return(-1);
2125 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2126 unsigned int unused = 0;
2127 xmlCharEncodingHandler * handler = in->buf->encoder;
2128 /*
2129 * Encoding conversion, compute the number of unused original
2130 * bytes from the input not consumed and substract that from
2131 * the raw consumed value, this is not a cheap operation
2132 */
2133 if (in->end - in->cur > 0) {
2134 static unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002135 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002136 int toconv = in->end - in->cur, written = 32000;
2137
2138 int ret;
2139
2140 if (handler->output != NULL) {
2141 do {
2142 toconv = in->end - cur;
2143 written = 32000;
2144 ret = handler->output(&convbuf[0], &written,
2145 cur, &toconv);
2146 if (ret == -1) return(-1);
2147 unused += written;
2148 cur += toconv;
2149 } while (ret == -2);
2150#ifdef LIBXML_ICONV_ENABLED
2151 } else if (handler->iconv_out != NULL) {
2152 do {
2153 toconv = in->end - cur;
2154 written = 32000;
2155 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2156 &written, cur, &toconv);
2157 if (ret == -1) {
2158 if (written > 0)
2159 ret = -2;
2160 else
2161 return(-1);
2162 }
2163 unused += written;
2164 cur += toconv;
2165 } while (ret == -2);
2166#endif
2167 } else {
2168 /* could not find a converter */
2169 return(-1);
2170 }
2171 }
2172 if (in->buf->rawconsumed < unused)
2173 return(-1);
2174 return(in->buf->rawconsumed - unused);
2175 }
2176 return(in->consumed + (in->cur - in->base));
2177}
2178
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002179#ifndef LIBXML_ICONV_ENABLED
2180#ifdef LIBXML_ISO8859X_ENABLED
2181
2182/**
2183 * UTF8ToISO8859x:
2184 * @out: a pointer to an array of bytes to store the result
2185 * @outlen: the length of @out
2186 * @in: a pointer to an array of UTF-8 chars
2187 * @inlen: the length of @in
2188 * @xlattable: the 2-level transcoding table
2189 *
2190 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2191 * block of chars out.
2192 *
2193 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2194 * The value of @inlen after return is the number of octets consumed
2195 * as the return value is positive, else unpredictable.
2196 * The value of @outlen after return is the number of ocetes consumed.
2197 */
2198static int
2199UTF8ToISO8859x(unsigned char* out, int *outlen,
2200 const unsigned char* in, int *inlen,
2201 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002202 const unsigned char* outstart = out;
2203 const unsigned char* inend;
2204 const unsigned char* instart = in;
2205
2206 if (in == NULL) {
2207 /*
2208 * initialization nothing to do
2209 */
2210 *outlen = 0;
2211 *inlen = 0;
2212 return(0);
2213 }
2214 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002215 while (in < inend) {
2216 unsigned char d = *in++;
2217 if (d < 0x80) {
2218 *out++ = d;
2219 } else if (d < 0xC0) {
2220 /* trailing byte in leading position */
2221 *outlen = out - outstart;
2222 *inlen = in - instart - 1;
2223 return(-2);
2224 } else if (d < 0xE0) {
2225 unsigned char c;
2226 if (!(in < inend)) {
2227 /* trailing byte not in input buffer */
2228 *outlen = out - outstart;
2229 *inlen = in - instart - 1;
2230 return(-2);
2231 }
2232 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002233 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002234 /* not a trailing byte */
2235 *outlen = out - outstart;
2236 *inlen = in - instart - 2;
2237 return(-2);
2238 }
2239 c = c & 0x3F;
2240 d = d & 0x1F;
2241 d = xlattable [48 + c + xlattable [d] * 64];
2242 if (d == 0) {
2243 /* not in character set */
2244 *outlen = out - outstart;
2245 *inlen = in - instart - 2;
2246 return(-2);
2247 }
2248 *out++ = d;
2249 } else if (d < 0xF0) {
2250 unsigned char c1;
2251 unsigned char c2;
2252 if (!(in < inend - 1)) {
2253 /* trailing bytes not in input buffer */
2254 *outlen = out - outstart;
2255 *inlen = in - instart - 1;
2256 return(-2);
2257 }
2258 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002259 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002260 /* not a trailing byte (c1) */
2261 *outlen = out - outstart;
2262 *inlen = in - instart - 2;
2263 return(-2);
2264 }
2265 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002266 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002267 /* not a trailing byte (c2) */
2268 *outlen = out - outstart;
2269 *inlen = in - instart - 2;
2270 return(-2);
2271 }
2272 c1 = c1 & 0x3F;
2273 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002274 d = d & 0x0F;
2275 d = xlattable [48 + c2 + xlattable [48 + c1 +
2276 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002277 if (d == 0) {
2278 /* not in character set */
2279 *outlen = out - outstart;
2280 *inlen = in - instart - 3;
2281 return(-2);
2282 }
2283 *out++ = d;
2284 } else {
2285 /* cannot transcode >= U+010000 */
2286 *outlen = out - outstart;
2287 *inlen = in - instart - 1;
2288 return(-2);
2289 }
2290 }
2291 *outlen = out - outstart;
2292 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002293 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002294}
2295
2296/**
2297 * ISO8859xToUTF8
2298 * @out: a pointer to an array of bytes to store the result
2299 * @outlen: the length of @out
2300 * @in: a pointer to an array of ISO Latin 1 chars
2301 * @inlen: the length of @in
2302 *
2303 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2304 * block of chars out.
2305 * Returns 0 if success, or -1 otherwise
2306 * The value of @inlen after return is the number of octets consumed
2307 * The value of @outlen after return is the number of ocetes produced.
2308 */
2309static int
2310ISO8859xToUTF8(unsigned char* out, int *outlen,
2311 const unsigned char* in, int *inlen,
2312 unsigned short const *unicodetable) {
2313 unsigned char* outstart = out;
2314 unsigned char* outend = out + *outlen;
2315 const unsigned char* instart = in;
2316 const unsigned char* inend = in + *inlen;
2317 const unsigned char* instop = inend;
2318 unsigned int c = *in;
2319
2320 while (in < inend && out < outend - 1) {
2321 if (c >= 0x80) {
2322 c = unicodetable [c - 0x80];
2323 if (c == 0) {
2324 /* undefined code point */
2325 *outlen = out - outstart;
2326 *inlen = in - instart;
2327 return (-1);
2328 }
2329 if (c < 0x800) {
2330 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2331 *out++ = (c & 0x3F) | 0x80;
2332 } else {
2333 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2334 *out++ = ((c >> 6) & 0x3F) | 0x80;
2335 *out++ = (c & 0x3F) | 0x80;
2336 }
2337 ++in;
2338 c = *in;
2339 }
2340 if (instop - in > outend - out) instop = in + (outend - out);
2341 while (c < 0x80 && in < instop) {
2342 *out++ = c;
2343 ++in;
2344 c = *in;
2345 }
2346 }
2347 if (in < inend && out < outend && c < 0x80) {
2348 *out++ = c;
2349 ++in;
2350 }
2351 *outlen = out - outstart;
2352 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002353 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002354}
2355
2356
2357/************************************************************************
2358 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2359 ************************************************************************/
2360
2361static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2362 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2363 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2364 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2365 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2366 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2367 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2368 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2369 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2370 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2371 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2372 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2373 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2374 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2375 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2376 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2377 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2378};
2379
2380static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2381 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2388 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2389 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2390 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2391 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2392 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2393 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2396 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2397 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2400 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2401 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2402 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2403 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2404 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2405 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2406 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2407 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2408};
2409
2410static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2411 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2412 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2413 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2414 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2415 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2416 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2417 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2418 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2419 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2420 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2421 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2422 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2423 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2424 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2425 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2426 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2427};
2428
2429static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2430 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2435 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2437 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2438 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2439 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2440 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2441 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2443 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2444 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2446 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2449 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2457 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2458 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2459 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2460 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2461};
2462
2463static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2464 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2465 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2466 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2467 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2468 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2469 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2470 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2471 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2472 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2473 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2474 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2475 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2476 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2477 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2478 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2479 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2480};
2481
2482static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2483 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2490 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2491 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2492 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2493 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2494 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2495 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2496 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2497 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2498 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2499 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2500 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2501 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2502 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2506 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2507 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2508 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2509 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2510};
2511
2512static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2513 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2514 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2515 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2516 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2517 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2518 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2519 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2520 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2521 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2522 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2523 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2524 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2525 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2526 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2527 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2528 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2529};
2530
2531static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2532 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2533 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2534 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2539 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2540 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2541 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2543 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2544 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2545 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2546 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2547 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2548 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2551 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2552 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2553 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2559};
2560
2561static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2562 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2563 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2564 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2565 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2566 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2567 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2568 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2569 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2570 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2571 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2572 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2573 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2574 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2575 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2576 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2577 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2578};
2579
2580static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2581 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2582 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2585 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2586 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2587 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2588 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2589 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2590 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2592 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2598 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2599 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2600 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2601 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2602 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2603 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2604};
2605
2606static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2607 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2608 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2609 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2610 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2611 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2612 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2613 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2614 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2615 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2616 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2617 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2618 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2619 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2620 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2621 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2622 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2623};
2624
2625static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2626 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2633 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2634 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2635 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2636 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2637 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2642 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2645 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2646 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2649 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2650 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2651 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2652 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2653 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2657};
2658
2659static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2660 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2661 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2662 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2663 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2664 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2665 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2666 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2667 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2668 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2669 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2670 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2671 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2672 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2673 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2674 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2675 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2676};
2677
2678static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2679 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2681 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2686 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2687 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2688 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2689 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2690 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2698 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2703 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2708 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710};
2711
2712static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2713 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2714 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2715 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2716 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2717 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2718 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2719 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2720 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2721 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2722 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2723 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2724 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2725 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2726 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2727 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2728 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2729};
2730
2731static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2732 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2733 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2740 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2741 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2742 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2743 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2744 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2745 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2746 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755};
2756
2757static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2758 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2759 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2760 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2761 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2762 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2763 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2764 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2765 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2766 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2767 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2768 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2769 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2770 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2771 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2772 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2773 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2774};
2775
2776static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2777 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2784 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2785 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2786 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2787 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2788 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2789 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2790 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2791 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2792 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2795 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2796 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2805 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2806 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2807 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2808};
2809
2810static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2811 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2812 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2813 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2814 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2815 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2816 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2817 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2818 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2819 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2820 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2821 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2822 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2823 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2824 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2825 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2826 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2827};
2828
2829static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2830 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2838 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2839 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2844 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2845 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2846 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2847 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2848 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2849 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2854 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857};
2858
2859static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2860 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2861 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2862 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2863 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2864 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2865 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2866 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2867 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2868 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2869 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2870 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2871 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2872 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2873 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2874 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2875 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2876};
2877
2878static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2879 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2885 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2887 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2888 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2889 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2890 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2900 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2901 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2902 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2904 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2905 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2906 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2907 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2909 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2910};
2911
2912static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2913 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2914 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2915 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2916 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2917 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2918 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2919 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2920 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2921 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2922 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2923 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2924 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2925 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2926 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2927 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2928 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2929};
2930
2931static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2932 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2940 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2941 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2942 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2947 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2967 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2972 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2973 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2974 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2975};
2976
2977static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
2978 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2979 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2980 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2981 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2982 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
2983 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2984 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
2985 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
2986 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2987 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2988 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2989 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2990 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2991 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2992 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2993 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
2994};
2995
2996static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
2997 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3003 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3005 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3006 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3007 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3008 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3020 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3021 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3022 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3023 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3024};
3025
3026static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3027 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3028 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3029 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3030 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3031 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3032 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3033 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3034 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3035 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3036 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3037 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3038 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3039 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3040 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3041 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3042 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3043};
3044
3045static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3046 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3054 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3055 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3056 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3057 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3058 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3063 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3065 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3079 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3080 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3081 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3082 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3083 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3084 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3085};
3086
3087
3088/*
3089 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3090 */
3091
3092static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3093 const unsigned char* in, int *inlen) {
3094 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3095}
3096static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3097 const unsigned char* in, int *inlen) {
3098 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3099}
3100
3101static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3102 const unsigned char* in, int *inlen) {
3103 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3104}
3105static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3106 const unsigned char* in, int *inlen) {
3107 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3108}
3109
3110static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3111 const unsigned char* in, int *inlen) {
3112 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3113}
3114static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3115 const unsigned char* in, int *inlen) {
3116 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3117}
3118
3119static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3120 const unsigned char* in, int *inlen) {
3121 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3122}
3123static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3124 const unsigned char* in, int *inlen) {
3125 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3126}
3127
3128static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3129 const unsigned char* in, int *inlen) {
3130 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3131}
3132static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3133 const unsigned char* in, int *inlen) {
3134 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3135}
3136
3137static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3138 const unsigned char* in, int *inlen) {
3139 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3140}
3141static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3142 const unsigned char* in, int *inlen) {
3143 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3144}
3145
3146static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3147 const unsigned char* in, int *inlen) {
3148 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3149}
3150static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3151 const unsigned char* in, int *inlen) {
3152 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3153}
3154
3155static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3156 const unsigned char* in, int *inlen) {
3157 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3158}
3159static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3160 const unsigned char* in, int *inlen) {
3161 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3162}
3163
3164static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3165 const unsigned char* in, int *inlen) {
3166 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3167}
3168static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3169 const unsigned char* in, int *inlen) {
3170 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3171}
3172
3173static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3174 const unsigned char* in, int *inlen) {
3175 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3176}
3177static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3178 const unsigned char* in, int *inlen) {
3179 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3180}
3181
3182static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3183 const unsigned char* in, int *inlen) {
3184 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3185}
3186static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3187 const unsigned char* in, int *inlen) {
3188 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3189}
3190
3191static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3192 const unsigned char* in, int *inlen) {
3193 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3194}
3195static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3196 const unsigned char* in, int *inlen) {
3197 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3198}
3199
3200static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3201 const unsigned char* in, int *inlen) {
3202 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3203}
3204static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3205 const unsigned char* in, int *inlen) {
3206 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3207}
3208
3209static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3210 const unsigned char* in, int *inlen) {
3211 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3212}
3213static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3214 const unsigned char* in, int *inlen) {
3215 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3216}
3217
3218static void
3219xmlRegisterCharEncodingHandlersISO8859x (void) {
3220 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3221 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3222 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3223 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3224 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3225 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3226 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3227 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3228 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3229 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3230 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3231 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3232 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3233 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3234}
3235
3236#endif
3237#endif
3238
3239