blob: 7ee072e440cebedc3e843a65d200240f31d49e57 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
128 return(0);
129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
158 if (in == NULL) {
159 /*
160 * initialization nothing to do
161 */
162 *outlen = 0;
163 *inlen = 0;
164 return(0);
165 }
166 inend = in + (*inlen);
167 outend = out + (*outlen);
168 while (in < inend) {
169 d = *in++;
170 if (d < 0x80) { c= d; trailing= 0; }
171 else if (d < 0xC0) {
172 /* trailing byte in leading position */
173 *outlen = out - outstart;
174 *inlen = processed - instart;
175 return(-2);
176 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
177 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
178 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
179 else {
180 /* no chance for this in Ascii */
181 *outlen = out - outstart;
182 *inlen = processed - instart;
183 return(-2);
184 }
185
186 if (inend - in < trailing) {
187 break;
188 }
189
190 for ( ; trailing; trailing--) {
191 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
192 break;
193 c <<= 6;
194 c |= d & 0x3F;
195 }
196
197 /* assertion: c is a single UTF-4 value */
198 if (c < 0x80) {
199 if (out >= outend)
200 break;
201 *out++ = c;
202 } else {
203 /* no chance for this in Ascii */
204 *outlen = out - outstart;
205 *inlen = processed - instart;
206 return(-2);
207 }
208 processed = in;
209 }
210 *outlen = out - outstart;
211 *inlen = processed - instart;
212 return(0);
213}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000214#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000215
216/**
217 * isolat1ToUTF8:
218 * @out: a pointer to an array of bytes to store the result
219 * @outlen: the length of @out
220 * @in: a pointer to an array of ISO Latin 1 chars
221 * @inlen: the length of @in
222 *
223 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
224 * block of chars out.
225 * Returns 0 if success, or -1 otherwise
226 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000227 * if the return value is positive, else unpredictable.
228 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000229 */
230int
231isolat1ToUTF8(unsigned char* out, int *outlen,
232 const unsigned char* in, int *inlen) {
233 unsigned char* outstart = out;
234 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000235 unsigned char* outend = out + *outlen;
236 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000237 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000238
239 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000240 instop = inend;
241
242 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000243 if (*in >= 0x80) {
244 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
245 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000246 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000247 }
248 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000249 while (in < instop && *in < 0x80) {
250 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 }
252 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000253 if (in < inend && out < outend && *in < 0x80) {
254 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000255 }
256 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000257 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000258 return(0);
259}
260
Daniel Veillard81601f92003-01-14 13:42:37 +0000261/**
262 * UTF8ToUTF8:
263 * @out: a pointer to an array of bytes to store the result
264 * @outlen: the length of @out
265 * @inb: a pointer to an array of UTF-8 chars
266 * @inlenb: the length of @in in UTF-8 chars
267 *
268 * No op copy operation for UTF8 handling.
269 *
William M. Brackf9415e42003-11-28 09:39:10 +0000270 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000271 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000272 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000273 */
274static int
275UTF8ToUTF8(unsigned char* out, int *outlen,
276 const unsigned char* inb, int *inlenb)
277{
278 int len;
279
280 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
281 return(-1);
282 if (*outlen > *inlenb) {
283 len = *inlenb;
284 } else {
285 len = *outlen;
286 }
287 if (len < 0)
288 return(-1);
289
290 memcpy(out, inb, len);
291
292 *outlen = len;
293 *inlenb = len;
294 return(0);
295}
296
Daniel Veillarde72c7562002-05-31 09:47:30 +0000297
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000298#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000299/**
300 * UTF8Toisolat1:
301 * @out: a pointer to an array of bytes to store the result
302 * @outlen: the length of @out
303 * @in: a pointer to an array of UTF-8 chars
304 * @inlen: the length of @in
305 *
306 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
307 * block of chars out.
308 *
309 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
310 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000311 * if the return value is positive, else unpredictable.
312 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000313 */
314int
315UTF8Toisolat1(unsigned char* out, int *outlen,
316 const unsigned char* in, int *inlen) {
317 const unsigned char* processed = in;
318 const unsigned char* outend;
319 const unsigned char* outstart = out;
320 const unsigned char* instart = in;
321 const unsigned char* inend;
322 unsigned int c, d;
323 int trailing;
324
325 if (in == NULL) {
326 /*
327 * initialization nothing to do
328 */
329 *outlen = 0;
330 *inlen = 0;
331 return(0);
332 }
333 inend = in + (*inlen);
334 outend = out + (*outlen);
335 while (in < inend) {
336 d = *in++;
337 if (d < 0x80) { c= d; trailing= 0; }
338 else if (d < 0xC0) {
339 /* trailing byte in leading position */
340 *outlen = out - outstart;
341 *inlen = processed - instart;
342 return(-2);
343 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
344 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
345 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
346 else {
347 /* no chance for this in IsoLat1 */
348 *outlen = out - outstart;
349 *inlen = processed - instart;
350 return(-2);
351 }
352
353 if (inend - in < trailing) {
354 break;
355 }
356
357 for ( ; trailing; trailing--) {
358 if (in >= inend)
359 break;
360 if (((d= *in++) & 0xC0) != 0x80) {
361 *outlen = out - outstart;
362 *inlen = processed - instart;
363 return(-2);
364 }
365 c <<= 6;
366 c |= d & 0x3F;
367 }
368
369 /* assertion: c is a single UTF-4 value */
370 if (c <= 0xFF) {
371 if (out >= outend)
372 break;
373 *out++ = c;
374 } else {
375 /* no chance for this in IsoLat1 */
376 *outlen = out - outstart;
377 *inlen = processed - instart;
378 return(-2);
379 }
380 processed = in;
381 }
382 *outlen = out - outstart;
383 *inlen = processed - instart;
384 return(0);
385}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000386#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000387
388/**
389 * UTF16LEToUTF8:
390 * @out: a pointer to an array of bytes to store the result
391 * @outlen: the length of @out
392 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
393 * @inlenb: the length of @in in UTF-16LE chars
394 *
395 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000396 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000397 * is the same between the native type of this machine and the
398 * inputed one.
399 *
William M. Brackf9415e42003-11-28 09:39:10 +0000400 * Returns the number of bytes written, or -1 if lack of space, or -2
401 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000402 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000403 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000404 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000405static int
Owen Taylor3473f882001-02-23 17:55:21 +0000406UTF16LEToUTF8(unsigned char* out, int *outlen,
407 const unsigned char* inb, int *inlenb)
408{
409 unsigned char* outstart = out;
410 const unsigned char* processed = inb;
411 unsigned char* outend = out + *outlen;
412 unsigned short* in = (unsigned short*) inb;
413 unsigned short* inend;
414 unsigned int c, d, inlen;
415 unsigned char *tmp;
416 int bits;
417
418 if ((*inlenb % 2) == 1)
419 (*inlenb)--;
420 inlen = *inlenb / 2;
421 inend = in + inlen;
422 while ((in < inend) && (out - outstart + 5 < *outlen)) {
423 if (xmlLittleEndian) {
424 c= *in++;
425 } else {
426 tmp = (unsigned char *) in;
427 c = *tmp++;
428 c = c | (((unsigned int)*tmp) << 8);
429 in++;
430 }
431 if ((c & 0xFC00) == 0xD800) { /* surrogates */
432 if (in >= inend) { /* (in > inend) shouldn't happens */
433 break;
434 }
435 if (xmlLittleEndian) {
436 d = *in++;
437 } else {
438 tmp = (unsigned char *) in;
439 d = *tmp++;
440 d = d | (((unsigned int)*tmp) << 8);
441 in++;
442 }
443 if ((d & 0xFC00) == 0xDC00) {
444 c &= 0x03FF;
445 c <<= 10;
446 c |= d & 0x03FF;
447 c += 0x10000;
448 }
449 else {
450 *outlen = out - outstart;
451 *inlenb = processed - inb;
452 return(-2);
453 }
454 }
455
456 /* assertion: c is a single UTF-4 value */
457 if (out >= outend)
458 break;
459 if (c < 0x80) { *out++= c; bits= -6; }
460 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
461 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
462 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
463
464 for ( ; bits >= 0; bits-= 6) {
465 if (out >= outend)
466 break;
467 *out++= ((c >> bits) & 0x3F) | 0x80;
468 }
469 processed = (const unsigned char*) in;
470 }
471 *outlen = out - outstart;
472 *inlenb = processed - inb;
473 return(0);
474}
475
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000476#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000477/**
478 * UTF8ToUTF16LE:
479 * @outb: a pointer to an array of bytes to store the result
480 * @outlen: the length of @outb
481 * @in: a pointer to an array of UTF-8 chars
482 * @inlen: the length of @in
483 *
484 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
485 * block of chars out.
486 *
William M. Brackf9415e42003-11-28 09:39:10 +0000487 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000488 * if the transcoding failed.
489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000490static int
Owen Taylor3473f882001-02-23 17:55:21 +0000491UTF8ToUTF16LE(unsigned char* outb, int *outlen,
492 const unsigned char* in, int *inlen)
493{
494 unsigned short* out = (unsigned short*) outb;
495 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000496 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000497 unsigned short* outstart= out;
498 unsigned short* outend;
499 const unsigned char* inend= in+*inlen;
500 unsigned int c, d;
501 int trailing;
502 unsigned char *tmp;
503 unsigned short tmp1, tmp2;
504
William M. Brackf9415e42003-11-28 09:39:10 +0000505 /* UTF16LE encoding has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000506 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000507 *outlen = 0;
508 *inlen = 0;
509 return(0);
510 }
511 outend = out + (*outlen / 2);
512 while (in < inend) {
513 d= *in++;
514 if (d < 0x80) { c= d; trailing= 0; }
515 else if (d < 0xC0) {
516 /* trailing byte in leading position */
517 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000518 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000519 return(-2);
520 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
521 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
522 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
523 else {
524 /* no chance for this in UTF-16 */
525 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000526 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 return(-2);
528 }
529
530 if (inend - in < trailing) {
531 break;
532 }
533
534 for ( ; trailing; trailing--) {
535 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
536 break;
537 c <<= 6;
538 c |= d & 0x3F;
539 }
540
541 /* assertion: c is a single UTF-4 value */
542 if (c < 0x10000) {
543 if (out >= outend)
544 break;
545 if (xmlLittleEndian) {
546 *out++ = c;
547 } else {
548 tmp = (unsigned char *) out;
549 *tmp = c ;
550 *(tmp + 1) = c >> 8 ;
551 out++;
552 }
553 }
554 else if (c < 0x110000) {
555 if (out+1 >= outend)
556 break;
557 c -= 0x10000;
558 if (xmlLittleEndian) {
559 *out++ = 0xD800 | (c >> 10);
560 *out++ = 0xDC00 | (c & 0x03FF);
561 } else {
562 tmp1 = 0xD800 | (c >> 10);
563 tmp = (unsigned char *) out;
564 *tmp = (unsigned char) tmp1;
565 *(tmp + 1) = tmp1 >> 8;
566 out++;
567
568 tmp2 = 0xDC00 | (c & 0x03FF);
569 tmp = (unsigned char *) out;
570 *tmp = (unsigned char) tmp2;
571 *(tmp + 1) = tmp2 >> 8;
572 out++;
573 }
574 }
575 else
576 break;
577 processed = in;
578 }
579 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000580 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 return(0);
582}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000583#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000584
585/**
William M. Brackf9415e42003-11-28 09:39:10 +0000586 * UTF8ToUTF16:
587 * @outb: a pointer to an array of bytes to store the result
588 * @outlen: the length of @outb
589 * @in: a pointer to an array of UTF-8 chars
590 * @inlen: the length of @in
591 *
592 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
593 * block of chars out.
594 *
595 * Returns the number of bytes written, or -1 if lack of space, or -2
596 * if the transcoding failed.
597 */
598static int
599UTF8ToUTF16(unsigned char* outb, int *outlen,
600 const unsigned char* in, int *inlen)
601{
602 if (in == NULL) {
603 /*
604 * initialization, add the Byte Order Mark for UTF-16LE
605 */
606 if (*outlen >= 2) {
607 outb[0] = 0xFF;
608 outb[1] = 0xFE;
609 *outlen = 2;
610 *inlen = 0;
611#ifdef DEBUG_ENCODING
612 xmlGenericError(xmlGenericErrorContext,
613 "Added FFFE Byte Order Mark\n");
614#endif
615 return(2);
616 }
617 *outlen = 0;
618 *inlen = 0;
619 return(0);
620 }
621 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
622}
623
624/**
Owen Taylor3473f882001-02-23 17:55:21 +0000625 * UTF16BEToUTF8:
626 * @out: a pointer to an array of bytes to store the result
627 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000628 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000629 * @inlenb: the length of @in in UTF-16 chars
630 *
631 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000632 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * is the same between the native type of this machine and the
634 * inputed one.
635 *
William M. Brackf9415e42003-11-28 09:39:10 +0000636 * Returns the number of bytes written, or -1 if lack of space, or -2
637 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000639 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000640 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000641static int
Owen Taylor3473f882001-02-23 17:55:21 +0000642UTF16BEToUTF8(unsigned char* out, int *outlen,
643 const unsigned char* inb, int *inlenb)
644{
645 unsigned char* outstart = out;
646 const unsigned char* processed = inb;
647 unsigned char* outend = out + *outlen;
648 unsigned short* in = (unsigned short*) inb;
649 unsigned short* inend;
650 unsigned int c, d, inlen;
651 unsigned char *tmp;
652 int bits;
653
654 if ((*inlenb % 2) == 1)
655 (*inlenb)--;
656 inlen = *inlenb / 2;
657 inend= in + inlen;
658 while (in < inend) {
659 if (xmlLittleEndian) {
660 tmp = (unsigned char *) in;
661 c = *tmp++;
662 c = c << 8;
663 c = c | (unsigned int) *tmp;
664 in++;
665 } else {
666 c= *in++;
667 }
668 if ((c & 0xFC00) == 0xD800) { /* surrogates */
669 if (in >= inend) { /* (in > inend) shouldn't happens */
670 *outlen = out - outstart;
671 *inlenb = processed - inb;
672 return(-2);
673 }
674 if (xmlLittleEndian) {
675 tmp = (unsigned char *) in;
676 d = *tmp++;
677 d = d << 8;
678 d = d | (unsigned int) *tmp;
679 in++;
680 } else {
681 d= *in++;
682 }
683 if ((d & 0xFC00) == 0xDC00) {
684 c &= 0x03FF;
685 c <<= 10;
686 c |= d & 0x03FF;
687 c += 0x10000;
688 }
689 else {
690 *outlen = out - outstart;
691 *inlenb = processed - inb;
692 return(-2);
693 }
694 }
695
696 /* assertion: c is a single UTF-4 value */
697 if (out >= outend)
698 break;
699 if (c < 0x80) { *out++= c; bits= -6; }
700 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
701 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
702 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
703
704 for ( ; bits >= 0; bits-= 6) {
705 if (out >= outend)
706 break;
707 *out++= ((c >> bits) & 0x3F) | 0x80;
708 }
709 processed = (const unsigned char*) in;
710 }
711 *outlen = out - outstart;
712 *inlenb = processed - inb;
713 return(0);
714}
715
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000716#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000717/**
718 * UTF8ToUTF16BE:
719 * @outb: a pointer to an array of bytes to store the result
720 * @outlen: the length of @outb
721 * @in: a pointer to an array of UTF-8 chars
722 * @inlen: the length of @in
723 *
724 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
725 * block of chars out.
726 *
727 * Returns the number of byte written, or -1 by lack of space, or -2
728 * if the transcoding failed.
729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000730static int
Owen Taylor3473f882001-02-23 17:55:21 +0000731UTF8ToUTF16BE(unsigned char* outb, int *outlen,
732 const unsigned char* in, int *inlen)
733{
734 unsigned short* out = (unsigned short*) outb;
735 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000736 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000737 unsigned short* outstart= out;
738 unsigned short* outend;
739 const unsigned char* inend= in+*inlen;
740 unsigned int c, d;
741 int trailing;
742 unsigned char *tmp;
743 unsigned short tmp1, tmp2;
744
William M. Brackf9415e42003-11-28 09:39:10 +0000745 /* UTF-16BE has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000747 *outlen = 0;
748 *inlen = 0;
749 return(0);
750 }
751 outend = out + (*outlen / 2);
752 while (in < inend) {
753 d= *in++;
754 if (d < 0x80) { c= d; trailing= 0; }
755 else if (d < 0xC0) {
756 /* trailing byte in leading position */
757 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000758 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000759 return(-2);
760 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
761 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
762 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
763 else {
764 /* no chance for this in UTF-16 */
765 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000766 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000767 return(-2);
768 }
769
770 if (inend - in < trailing) {
771 break;
772 }
773
774 for ( ; trailing; trailing--) {
775 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
776 c <<= 6;
777 c |= d & 0x3F;
778 }
779
780 /* assertion: c is a single UTF-4 value */
781 if (c < 0x10000) {
782 if (out >= outend) break;
783 if (xmlLittleEndian) {
784 tmp = (unsigned char *) out;
785 *tmp = c >> 8;
786 *(tmp + 1) = c;
787 out++;
788 } else {
789 *out++ = c;
790 }
791 }
792 else if (c < 0x110000) {
793 if (out+1 >= outend) break;
794 c -= 0x10000;
795 if (xmlLittleEndian) {
796 tmp1 = 0xD800 | (c >> 10);
797 tmp = (unsigned char *) out;
798 *tmp = tmp1 >> 8;
799 *(tmp + 1) = (unsigned char) tmp1;
800 out++;
801
802 tmp2 = 0xDC00 | (c & 0x03FF);
803 tmp = (unsigned char *) out;
804 *tmp = tmp2 >> 8;
805 *(tmp + 1) = (unsigned char) tmp2;
806 out++;
807 } else {
808 *out++ = 0xD800 | (c >> 10);
809 *out++ = 0xDC00 | (c & 0x03FF);
810 }
811 }
812 else
813 break;
814 processed = in;
815 }
816 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000817 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000818 return(0);
819}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000820#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000821
Daniel Veillard97ac1312001-05-30 19:14:17 +0000822/************************************************************************
823 * *
824 * Generic encoding handling routines *
825 * *
826 ************************************************************************/
827
Owen Taylor3473f882001-02-23 17:55:21 +0000828/**
829 * xmlDetectCharEncoding:
830 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000831 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @len: pointer to the length of the buffer
833 *
834 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000835 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000836 *
837 * Returns one of the XML_CHAR_ENCODING_... values.
838 */
839xmlCharEncoding
840xmlDetectCharEncoding(const unsigned char* in, int len)
841{
842 if (len >= 4) {
843 if ((in[0] == 0x00) && (in[1] == 0x00) &&
844 (in[2] == 0x00) && (in[3] == 0x3C))
845 return(XML_CHAR_ENCODING_UCS4BE);
846 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
847 (in[2] == 0x00) && (in[3] == 0x00))
848 return(XML_CHAR_ENCODING_UCS4LE);
849 if ((in[0] == 0x00) && (in[1] == 0x00) &&
850 (in[2] == 0x3C) && (in[3] == 0x00))
851 return(XML_CHAR_ENCODING_UCS4_2143);
852 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
853 (in[2] == 0x00) && (in[3] == 0x00))
854 return(XML_CHAR_ENCODING_UCS4_3412);
855 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
856 (in[2] == 0xA7) && (in[3] == 0x94))
857 return(XML_CHAR_ENCODING_EBCDIC);
858 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
859 (in[2] == 0x78) && (in[3] == 0x6D))
860 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000861 /*
862 * Although not part of the recommendation, we also
863 * attempt an "auto-recognition" of UTF-16LE and
864 * UTF-16BE encodings.
865 */
866 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
867 (in[2] == 0x3F) && (in[3] == 0x00))
868 return(XML_CHAR_ENCODING_UTF16LE);
869 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
870 (in[2] == 0x00) && (in[3] == 0x3F))
871 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000872 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000873 if (len >= 3) {
874 /*
875 * Errata on XML-1.0 June 20 2001
876 * We now allow an UTF8 encoded BOM
877 */
878 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
879 (in[2] == 0xBF))
880 return(XML_CHAR_ENCODING_UTF8);
881 }
William M. Brackf9415e42003-11-28 09:39:10 +0000882 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000883 if (len >= 2) {
884 if ((in[0] == 0xFE) && (in[1] == 0xFF))
885 return(XML_CHAR_ENCODING_UTF16BE);
886 if ((in[0] == 0xFF) && (in[1] == 0xFE))
887 return(XML_CHAR_ENCODING_UTF16LE);
888 }
889 return(XML_CHAR_ENCODING_NONE);
890}
891
892/**
893 * xmlCleanupEncodingAliases:
894 *
895 * Unregisters all aliases
896 */
897void
898xmlCleanupEncodingAliases(void) {
899 int i;
900
901 if (xmlCharEncodingAliases == NULL)
902 return;
903
904 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
905 if (xmlCharEncodingAliases[i].name != NULL)
906 xmlFree((char *) xmlCharEncodingAliases[i].name);
907 if (xmlCharEncodingAliases[i].alias != NULL)
908 xmlFree((char *) xmlCharEncodingAliases[i].alias);
909 }
910 xmlCharEncodingAliasesNb = 0;
911 xmlCharEncodingAliasesMax = 0;
912 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000913 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000914}
915
916/**
917 * xmlGetEncodingAlias:
918 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
919 *
920 * Lookup an encoding name for the given alias.
921 *
William M. Brackf9415e42003-11-28 09:39:10 +0000922 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000923 */
924const char *
925xmlGetEncodingAlias(const char *alias) {
926 int i;
927 char upper[100];
928
929 if (alias == NULL)
930 return(NULL);
931
932 if (xmlCharEncodingAliases == NULL)
933 return(NULL);
934
935 for (i = 0;i < 99;i++) {
936 upper[i] = toupper(alias[i]);
937 if (upper[i] == 0) break;
938 }
939 upper[i] = 0;
940
941 /*
942 * Walk down the list looking for a definition of the alias
943 */
944 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
945 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
946 return(xmlCharEncodingAliases[i].name);
947 }
948 }
949 return(NULL);
950}
951
952/**
953 * xmlAddEncodingAlias:
954 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
955 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
956 *
William M. Brackf9415e42003-11-28 09:39:10 +0000957 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000958 * will be overwritten.
959 *
960 * Returns 0 in case of success, -1 in case of error
961 */
962int
963xmlAddEncodingAlias(const char *name, const char *alias) {
964 int i;
965 char upper[100];
966
967 if ((name == NULL) || (alias == NULL))
968 return(-1);
969
970 for (i = 0;i < 99;i++) {
971 upper[i] = toupper(alias[i]);
972 if (upper[i] == 0) break;
973 }
974 upper[i] = 0;
975
976 if (xmlCharEncodingAliases == NULL) {
977 xmlCharEncodingAliasesNb = 0;
978 xmlCharEncodingAliasesMax = 20;
979 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
980 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
981 if (xmlCharEncodingAliases == NULL)
982 return(-1);
983 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
984 xmlCharEncodingAliasesMax *= 2;
985 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
986 xmlRealloc(xmlCharEncodingAliases,
987 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
988 }
989 /*
990 * Walk down the list looking for a definition of the alias
991 */
992 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
993 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
994 /*
995 * Replace the definition.
996 */
997 xmlFree((char *) xmlCharEncodingAliases[i].name);
998 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
999 return(0);
1000 }
1001 }
1002 /*
1003 * Add the definition
1004 */
1005 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1006 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1007 xmlCharEncodingAliasesNb++;
1008 return(0);
1009}
1010
1011/**
1012 * xmlDelEncodingAlias:
1013 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1014 *
1015 * Unregisters an encoding alias @alias
1016 *
1017 * Returns 0 in case of success, -1 in case of error
1018 */
1019int
1020xmlDelEncodingAlias(const char *alias) {
1021 int i;
1022
1023 if (alias == NULL)
1024 return(-1);
1025
1026 if (xmlCharEncodingAliases == NULL)
1027 return(-1);
1028 /*
1029 * Walk down the list looking for a definition of the alias
1030 */
1031 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1032 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1033 xmlFree((char *) xmlCharEncodingAliases[i].name);
1034 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1035 xmlCharEncodingAliasesNb--;
1036 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1037 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1038 return(0);
1039 }
1040 }
1041 return(-1);
1042}
1043
1044/**
1045 * xmlParseCharEncoding:
1046 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1047 *
William M. Brackf9415e42003-11-28 09:39:10 +00001048 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001049 * that the comparison is case insensitive accordingly to the section
1050 * [XML] 4.3.3 Character Encoding in Entities.
1051 *
1052 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1053 * if not recognized.
1054 */
1055xmlCharEncoding
1056xmlParseCharEncoding(const char* name)
1057{
1058 const char *alias;
1059 char upper[500];
1060 int i;
1061
1062 if (name == NULL)
1063 return(XML_CHAR_ENCODING_NONE);
1064
1065 /*
1066 * Do the alias resolution
1067 */
1068 alias = xmlGetEncodingAlias(name);
1069 if (alias != NULL)
1070 name = alias;
1071
1072 for (i = 0;i < 499;i++) {
1073 upper[i] = toupper(name[i]);
1074 if (upper[i] == 0) break;
1075 }
1076 upper[i] = 0;
1077
1078 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1079 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1080 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1081
1082 /*
1083 * NOTE: if we were able to parse this, the endianness of UTF16 is
1084 * already found and in use
1085 */
1086 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1087 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1088
1089 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1090 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1091 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1092
1093 /*
1094 * NOTE: if we were able to parse this, the endianness of UCS4 is
1095 * already found and in use
1096 */
1097 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1098 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1099 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1100
1101
1102 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1103 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1104 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1105
1106 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1107 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1108 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1109
1110 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1111 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1112 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1113 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1114 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1115 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1116 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1117
1118 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1119 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1120 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1121
1122#ifdef DEBUG_ENCODING
1123 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1124#endif
1125 return(XML_CHAR_ENCODING_ERROR);
1126}
1127
1128/**
1129 * xmlGetCharEncodingName:
1130 * @enc: the encoding
1131 *
1132 * The "canonical" name for XML encoding.
1133 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1134 * Section 4.3.3 Character Encoding in Entities
1135 *
1136 * Returns the canonical name for the given encoding
1137 */
1138
1139const char*
1140xmlGetCharEncodingName(xmlCharEncoding enc) {
1141 switch (enc) {
1142 case XML_CHAR_ENCODING_ERROR:
1143 return(NULL);
1144 case XML_CHAR_ENCODING_NONE:
1145 return(NULL);
1146 case XML_CHAR_ENCODING_UTF8:
1147 return("UTF-8");
1148 case XML_CHAR_ENCODING_UTF16LE:
1149 return("UTF-16");
1150 case XML_CHAR_ENCODING_UTF16BE:
1151 return("UTF-16");
1152 case XML_CHAR_ENCODING_EBCDIC:
1153 return("EBCDIC");
1154 case XML_CHAR_ENCODING_UCS4LE:
1155 return("ISO-10646-UCS-4");
1156 case XML_CHAR_ENCODING_UCS4BE:
1157 return("ISO-10646-UCS-4");
1158 case XML_CHAR_ENCODING_UCS4_2143:
1159 return("ISO-10646-UCS-4");
1160 case XML_CHAR_ENCODING_UCS4_3412:
1161 return("ISO-10646-UCS-4");
1162 case XML_CHAR_ENCODING_UCS2:
1163 return("ISO-10646-UCS-2");
1164 case XML_CHAR_ENCODING_8859_1:
1165 return("ISO-8859-1");
1166 case XML_CHAR_ENCODING_8859_2:
1167 return("ISO-8859-2");
1168 case XML_CHAR_ENCODING_8859_3:
1169 return("ISO-8859-3");
1170 case XML_CHAR_ENCODING_8859_4:
1171 return("ISO-8859-4");
1172 case XML_CHAR_ENCODING_8859_5:
1173 return("ISO-8859-5");
1174 case XML_CHAR_ENCODING_8859_6:
1175 return("ISO-8859-6");
1176 case XML_CHAR_ENCODING_8859_7:
1177 return("ISO-8859-7");
1178 case XML_CHAR_ENCODING_8859_8:
1179 return("ISO-8859-8");
1180 case XML_CHAR_ENCODING_8859_9:
1181 return("ISO-8859-9");
1182 case XML_CHAR_ENCODING_2022_JP:
1183 return("ISO-2022-JP");
1184 case XML_CHAR_ENCODING_SHIFT_JIS:
1185 return("Shift-JIS");
1186 case XML_CHAR_ENCODING_EUC_JP:
1187 return("EUC-JP");
1188 case XML_CHAR_ENCODING_ASCII:
1189 return(NULL);
1190 }
1191 return(NULL);
1192}
1193
Daniel Veillard97ac1312001-05-30 19:14:17 +00001194/************************************************************************
1195 * *
1196 * Char encoding handlers *
1197 * *
1198 ************************************************************************/
1199
Owen Taylor3473f882001-02-23 17:55:21 +00001200
1201/* the size should be growable, but it's not a big deal ... */
1202#define MAX_ENCODING_HANDLERS 50
1203static xmlCharEncodingHandlerPtr *handlers = NULL;
1204static int nbCharEncodingHandler = 0;
1205
1206/*
1207 * The default is UTF-8 for XML, that's also the default used for the
1208 * parser internals, so the default encoding handler is NULL
1209 */
1210
1211static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1212
1213/**
1214 * xmlNewCharEncodingHandler:
1215 * @name: the encoding name, in UTF-8 format (ASCII actually)
1216 * @input: the xmlCharEncodingInputFunc to read that encoding
1217 * @output: the xmlCharEncodingOutputFunc to write that encoding
1218 *
1219 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001220 *
Owen Taylor3473f882001-02-23 17:55:21 +00001221 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1222 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001223xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001224xmlNewCharEncodingHandler(const char *name,
1225 xmlCharEncodingInputFunc input,
1226 xmlCharEncodingOutputFunc output) {
1227 xmlCharEncodingHandlerPtr handler;
1228 const char *alias;
1229 char upper[500];
1230 int i;
1231 char *up = 0;
1232
1233 /*
1234 * Do the alias resolution
1235 */
1236 alias = xmlGetEncodingAlias(name);
1237 if (alias != NULL)
1238 name = alias;
1239
1240 /*
1241 * Keep only the uppercase version of the encoding.
1242 */
1243 if (name == NULL) {
1244 xmlGenericError(xmlGenericErrorContext,
1245 "xmlNewCharEncodingHandler : no name !\n");
1246 return(NULL);
1247 }
1248 for (i = 0;i < 499;i++) {
1249 upper[i] = toupper(name[i]);
1250 if (upper[i] == 0) break;
1251 }
1252 upper[i] = 0;
1253 up = xmlMemStrdup(upper);
1254 if (up == NULL) {
1255 xmlGenericError(xmlGenericErrorContext,
1256 "xmlNewCharEncodingHandler : out of memory !\n");
1257 return(NULL);
1258 }
1259
1260 /*
1261 * allocate and fill-up an handler block.
1262 */
1263 handler = (xmlCharEncodingHandlerPtr)
1264 xmlMalloc(sizeof(xmlCharEncodingHandler));
1265 if (handler == NULL) {
1266 xmlGenericError(xmlGenericErrorContext,
1267 "xmlNewCharEncodingHandler : out of memory !\n");
1268 return(NULL);
1269 }
1270 handler->input = input;
1271 handler->output = output;
1272 handler->name = up;
1273
1274#ifdef LIBXML_ICONV_ENABLED
1275 handler->iconv_in = NULL;
1276 handler->iconv_out = NULL;
1277#endif /* LIBXML_ICONV_ENABLED */
1278
1279 /*
1280 * registers and returns the handler.
1281 */
1282 xmlRegisterCharEncodingHandler(handler);
1283#ifdef DEBUG_ENCODING
1284 xmlGenericError(xmlGenericErrorContext,
1285 "Registered encoding handler for %s\n", name);
1286#endif
1287 return(handler);
1288}
1289
1290/**
1291 * xmlInitCharEncodingHandlers:
1292 *
1293 * Initialize the char encoding support, it registers the default
1294 * encoding supported.
1295 * NOTE: while public, this function usually doesn't need to be called
1296 * in normal processing.
1297 */
1298void
1299xmlInitCharEncodingHandlers(void) {
1300 unsigned short int tst = 0x1234;
1301 unsigned char *ptr = (unsigned char *) &tst;
1302
1303 if (handlers != NULL) return;
1304
1305 handlers = (xmlCharEncodingHandlerPtr *)
1306 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1307
1308 if (*ptr == 0x12) xmlLittleEndian = 0;
1309 else if (*ptr == 0x34) xmlLittleEndian = 1;
1310 else xmlGenericError(xmlGenericErrorContext,
1311 "Odd problem at endianness detection\n");
1312
1313 if (handlers == NULL) {
1314 xmlGenericError(xmlGenericErrorContext,
1315 "xmlInitCharEncodingHandlers : out of memory !\n");
1316 return;
1317 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001318 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001319#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001320 xmlUTF16LEHandler =
1321 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1322 xmlUTF16BEHandler =
1323 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001324 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001325 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1326 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001327 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001328#ifdef LIBXML_HTML_ENABLED
1329 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1330#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001331#else
1332 xmlUTF16LEHandler =
1333 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1334 xmlUTF16BEHandler =
1335 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001336 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001337 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1338 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1339 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1340#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001341#ifndef LIBXML_ICONV_ENABLED
1342#ifdef LIBXML_ISO8859X_ENABLED
1343 xmlRegisterCharEncodingHandlersISO8859x ();
1344#endif
1345#endif
1346
Owen Taylor3473f882001-02-23 17:55:21 +00001347}
1348
1349/**
1350 * xmlCleanupCharEncodingHandlers:
1351 *
1352 * Cleanup the memory allocated for the char encoding support, it
1353 * unregisters all the encoding handlers and the aliases.
1354 */
1355void
1356xmlCleanupCharEncodingHandlers(void) {
1357 xmlCleanupEncodingAliases();
1358
1359 if (handlers == NULL) return;
1360
1361 for (;nbCharEncodingHandler > 0;) {
1362 nbCharEncodingHandler--;
1363 if (handlers[nbCharEncodingHandler] != NULL) {
1364 if (handlers[nbCharEncodingHandler]->name != NULL)
1365 xmlFree(handlers[nbCharEncodingHandler]->name);
1366 xmlFree(handlers[nbCharEncodingHandler]);
1367 }
1368 }
1369 xmlFree(handlers);
1370 handlers = NULL;
1371 nbCharEncodingHandler = 0;
1372 xmlDefaultCharEncodingHandler = NULL;
1373}
1374
1375/**
1376 * xmlRegisterCharEncodingHandler:
1377 * @handler: the xmlCharEncodingHandlerPtr handler block
1378 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001379 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001380 */
1381void
1382xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1383 if (handlers == NULL) xmlInitCharEncodingHandlers();
1384 if (handler == NULL) {
1385 xmlGenericError(xmlGenericErrorContext,
1386 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1387 return;
1388 }
1389
1390 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1391 xmlGenericError(xmlGenericErrorContext,
1392 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1393 xmlGenericError(xmlGenericErrorContext,
1394 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1395 return;
1396 }
1397 handlers[nbCharEncodingHandler++] = handler;
1398}
1399
1400/**
1401 * xmlGetCharEncodingHandler:
1402 * @enc: an xmlCharEncoding value.
1403 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001404 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001405 *
1406 * Returns the handler or NULL if not found
1407 */
1408xmlCharEncodingHandlerPtr
1409xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1410 xmlCharEncodingHandlerPtr handler;
1411
1412 if (handlers == NULL) xmlInitCharEncodingHandlers();
1413 switch (enc) {
1414 case XML_CHAR_ENCODING_ERROR:
1415 return(NULL);
1416 case XML_CHAR_ENCODING_NONE:
1417 return(NULL);
1418 case XML_CHAR_ENCODING_UTF8:
1419 return(NULL);
1420 case XML_CHAR_ENCODING_UTF16LE:
1421 return(xmlUTF16LEHandler);
1422 case XML_CHAR_ENCODING_UTF16BE:
1423 return(xmlUTF16BEHandler);
1424 case XML_CHAR_ENCODING_EBCDIC:
1425 handler = xmlFindCharEncodingHandler("EBCDIC");
1426 if (handler != NULL) return(handler);
1427 handler = xmlFindCharEncodingHandler("ebcdic");
1428 if (handler != NULL) return(handler);
1429 break;
1430 case XML_CHAR_ENCODING_UCS4BE:
1431 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1432 if (handler != NULL) return(handler);
1433 handler = xmlFindCharEncodingHandler("UCS-4");
1434 if (handler != NULL) return(handler);
1435 handler = xmlFindCharEncodingHandler("UCS4");
1436 if (handler != NULL) return(handler);
1437 break;
1438 case XML_CHAR_ENCODING_UCS4LE:
1439 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1440 if (handler != NULL) return(handler);
1441 handler = xmlFindCharEncodingHandler("UCS-4");
1442 if (handler != NULL) return(handler);
1443 handler = xmlFindCharEncodingHandler("UCS4");
1444 if (handler != NULL) return(handler);
1445 break;
1446 case XML_CHAR_ENCODING_UCS4_2143:
1447 break;
1448 case XML_CHAR_ENCODING_UCS4_3412:
1449 break;
1450 case XML_CHAR_ENCODING_UCS2:
1451 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1452 if (handler != NULL) return(handler);
1453 handler = xmlFindCharEncodingHandler("UCS-2");
1454 if (handler != NULL) return(handler);
1455 handler = xmlFindCharEncodingHandler("UCS2");
1456 if (handler != NULL) return(handler);
1457 break;
1458
1459 /*
1460 * We used to keep ISO Latin encodings native in the
1461 * generated data. This led to so many problems that
1462 * this has been removed. One can still change this
1463 * back by registering no-ops encoders for those
1464 */
1465 case XML_CHAR_ENCODING_8859_1:
1466 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1467 if (handler != NULL) return(handler);
1468 break;
1469 case XML_CHAR_ENCODING_8859_2:
1470 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1471 if (handler != NULL) return(handler);
1472 break;
1473 case XML_CHAR_ENCODING_8859_3:
1474 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1475 if (handler != NULL) return(handler);
1476 break;
1477 case XML_CHAR_ENCODING_8859_4:
1478 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1479 if (handler != NULL) return(handler);
1480 break;
1481 case XML_CHAR_ENCODING_8859_5:
1482 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1483 if (handler != NULL) return(handler);
1484 break;
1485 case XML_CHAR_ENCODING_8859_6:
1486 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1487 if (handler != NULL) return(handler);
1488 break;
1489 case XML_CHAR_ENCODING_8859_7:
1490 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1491 if (handler != NULL) return(handler);
1492 break;
1493 case XML_CHAR_ENCODING_8859_8:
1494 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1495 if (handler != NULL) return(handler);
1496 break;
1497 case XML_CHAR_ENCODING_8859_9:
1498 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1499 if (handler != NULL) return(handler);
1500 break;
1501
1502
1503 case XML_CHAR_ENCODING_2022_JP:
1504 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1505 if (handler != NULL) return(handler);
1506 break;
1507 case XML_CHAR_ENCODING_SHIFT_JIS:
1508 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1509 if (handler != NULL) return(handler);
1510 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1511 if (handler != NULL) return(handler);
1512 handler = xmlFindCharEncodingHandler("Shift_JIS");
1513 if (handler != NULL) return(handler);
1514 break;
1515 case XML_CHAR_ENCODING_EUC_JP:
1516 handler = xmlFindCharEncodingHandler("EUC-JP");
1517 if (handler != NULL) return(handler);
1518 break;
1519 default:
1520 break;
1521 }
1522
1523#ifdef DEBUG_ENCODING
1524 xmlGenericError(xmlGenericErrorContext,
1525 "No handler found for encoding %d\n", enc);
1526#endif
1527 return(NULL);
1528}
1529
1530/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 * xmlFindCharEncodingHandler:
1532 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001533 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001534 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001535 *
1536 * Returns the handler or NULL if not found
1537 */
1538xmlCharEncodingHandlerPtr
1539xmlFindCharEncodingHandler(const char *name) {
1540 const char *nalias;
1541 const char *norig;
1542 xmlCharEncoding alias;
1543#ifdef LIBXML_ICONV_ENABLED
1544 xmlCharEncodingHandlerPtr enc;
1545 iconv_t icv_in, icv_out;
1546#endif /* LIBXML_ICONV_ENABLED */
1547 char upper[100];
1548 int i;
1549
1550 if (handlers == NULL) xmlInitCharEncodingHandlers();
1551 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1552 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1553
1554 /*
1555 * Do the alias resolution
1556 */
1557 norig = name;
1558 nalias = xmlGetEncodingAlias(name);
1559 if (nalias != NULL)
1560 name = nalias;
1561
1562 /*
1563 * Check first for directly registered encoding names
1564 */
1565 for (i = 0;i < 99;i++) {
1566 upper[i] = toupper(name[i]);
1567 if (upper[i] == 0) break;
1568 }
1569 upper[i] = 0;
1570
1571 for (i = 0;i < nbCharEncodingHandler; i++)
1572 if (!strcmp(upper, handlers[i]->name)) {
1573#ifdef DEBUG_ENCODING
1574 xmlGenericError(xmlGenericErrorContext,
1575 "Found registered handler for encoding %s\n", name);
1576#endif
1577 return(handlers[i]);
1578 }
1579
1580#ifdef LIBXML_ICONV_ENABLED
1581 /* check whether iconv can handle this */
1582 icv_in = iconv_open("UTF-8", name);
1583 icv_out = iconv_open(name, "UTF-8");
1584 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1585 enc = (xmlCharEncodingHandlerPtr)
1586 xmlMalloc(sizeof(xmlCharEncodingHandler));
1587 if (enc == NULL) {
1588 iconv_close(icv_in);
1589 iconv_close(icv_out);
1590 return(NULL);
1591 }
1592 enc->name = xmlMemStrdup(name);
1593 enc->input = NULL;
1594 enc->output = NULL;
1595 enc->iconv_in = icv_in;
1596 enc->iconv_out = icv_out;
1597#ifdef DEBUG_ENCODING
1598 xmlGenericError(xmlGenericErrorContext,
1599 "Found iconv handler for encoding %s\n", name);
1600#endif
1601 return enc;
1602 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1603 xmlGenericError(xmlGenericErrorContext,
1604 "iconv : problems with filters for '%s'\n", name);
1605 }
1606#endif /* LIBXML_ICONV_ENABLED */
1607
1608#ifdef DEBUG_ENCODING
1609 xmlGenericError(xmlGenericErrorContext,
1610 "No handler found for encoding %s\n", name);
1611#endif
1612
1613 /*
1614 * Fallback using the canonical names
1615 */
1616 alias = xmlParseCharEncoding(norig);
1617 if (alias != XML_CHAR_ENCODING_ERROR) {
1618 const char* canon;
1619 canon = xmlGetCharEncodingName(alias);
1620 if ((canon != NULL) && (strcmp(name, canon))) {
1621 return(xmlFindCharEncodingHandler(canon));
1622 }
1623 }
1624
William M. Brackf9415e42003-11-28 09:39:10 +00001625 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001626 return(NULL);
1627}
1628
Daniel Veillard97ac1312001-05-30 19:14:17 +00001629/************************************************************************
1630 * *
1631 * ICONV based generic conversion functions *
1632 * *
1633 ************************************************************************/
1634
Owen Taylor3473f882001-02-23 17:55:21 +00001635#ifdef LIBXML_ICONV_ENABLED
1636/**
1637 * xmlIconvWrapper:
1638 * @cd: iconv converter data structure
1639 * @out: a pointer to an array of bytes to store the result
1640 * @outlen: the length of @out
1641 * @in: a pointer to an array of ISO Latin 1 chars
1642 * @inlen: the length of @in
1643 *
1644 * Returns 0 if success, or
1645 * -1 by lack of space, or
1646 * -2 if the transcoding fails (for *in is not valid utf8 string or
1647 * the result of transformation can't fit into the encoding we want), or
1648 * -3 if there the last byte can't form a single output char.
1649 *
1650 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001651 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001652 * The value of @outlen after return is the number of ocetes consumed.
1653 */
1654static int
1655xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00001656 unsigned char *out, int *outlen,
1657 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00001658
Daniel Veillard9403a042001-05-28 11:00:53 +00001659 size_t icv_inlen = *inlen, icv_outlen = *outlen;
1660 const char *icv_in = (const char *) in;
1661 char *icv_out = (char *) out;
1662 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001663
Darin Adler699613b2001-07-27 22:47:14 +00001664 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001665 if (in != NULL) {
1666 *inlen -= icv_inlen;
1667 *outlen -= icv_outlen;
1668 } else {
1669 *inlen = 0;
1670 *outlen = 0;
1671 }
1672 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001673#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001674 if (errno == EILSEQ) {
1675 return -2;
1676 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001677#endif
1678#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001679 if (errno == E2BIG) {
1680 return -1;
1681 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001682#endif
1683#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001684 if (errno == EINVAL) {
1685 return -3;
1686 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001687#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001688 {
1689 return -3;
1690 }
1691 }
1692 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001693}
1694#endif /* LIBXML_ICONV_ENABLED */
1695
Daniel Veillard97ac1312001-05-30 19:14:17 +00001696/************************************************************************
1697 * *
1698 * The real API used by libxml for on-the-fly conversion *
1699 * *
1700 ************************************************************************/
1701
Owen Taylor3473f882001-02-23 17:55:21 +00001702/**
1703 * xmlCharEncFirstLine:
1704 * @handler: char enconding transformation data structure
1705 * @out: an xmlBuffer for the output.
1706 * @in: an xmlBuffer for the input
1707 *
1708 * Front-end for the encoding handler input function, but handle only
1709 * the very first line, i.e. limit itself to 45 chars.
1710 *
1711 * Returns the number of byte written if success, or
1712 * -1 general error
1713 * -2 if the transcoding fails (for *in is not valid utf8 string or
1714 * the result of transformation can't fit into the encoding we want), or
1715 */
1716int
1717xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1718 xmlBufferPtr in) {
1719 int ret = -2;
1720 int written;
1721 int toconv;
1722
1723 if (handler == NULL) return(-1);
1724 if (out == NULL) return(-1);
1725 if (in == NULL) return(-1);
1726
1727 written = out->size - out->use;
1728 toconv = in->use;
1729 if (toconv * 2 >= written) {
1730 xmlBufferGrow(out, toconv);
1731 written = out->size - out->use - 1;
1732 }
1733
1734 /*
1735 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1736 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001737 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001738 */
1739 written = 45;
1740
1741 if (handler->input != NULL) {
1742 ret = handler->input(&out->content[out->use], &written,
1743 in->content, &toconv);
1744 xmlBufferShrink(in, toconv);
1745 out->use += written;
1746 out->content[out->use] = 0;
1747 }
1748#ifdef LIBXML_ICONV_ENABLED
1749 else if (handler->iconv_in != NULL) {
1750 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1751 &written, in->content, &toconv);
1752 xmlBufferShrink(in, toconv);
1753 out->use += written;
1754 out->content[out->use] = 0;
1755 if (ret == -1) ret = -3;
1756 }
1757#endif /* LIBXML_ICONV_ENABLED */
1758#ifdef DEBUG_ENCODING
1759 switch (ret) {
1760 case 0:
1761 xmlGenericError(xmlGenericErrorContext,
1762 "converted %d bytes to %d bytes of input\n",
1763 toconv, written);
1764 break;
1765 case -1:
1766 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1767 toconv, written, in->use);
1768 break;
1769 case -2:
1770 xmlGenericError(xmlGenericErrorContext,
1771 "input conversion failed due to input error\n");
1772 break;
1773 case -3:
1774 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1775 toconv, written, in->use);
1776 break;
1777 default:
1778 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1779 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001780#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001781 /*
1782 * Ignore when input buffer is not on a boundary
1783 */
1784 if (ret == -3) ret = 0;
1785 if (ret == -1) ret = 0;
1786 return(ret);
1787}
1788
1789/**
1790 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001791 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001792 * @out: an xmlBuffer for the output.
1793 * @in: an xmlBuffer for the input
1794 *
1795 * Generic front-end for the encoding handler input function
1796 *
1797 * Returns the number of byte written if success, or
1798 * -1 general error
1799 * -2 if the transcoding fails (for *in is not valid utf8 string or
1800 * the result of transformation can't fit into the encoding we want), or
1801 */
1802int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001803xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1804 xmlBufferPtr in)
1805{
Owen Taylor3473f882001-02-23 17:55:21 +00001806 int ret = -2;
1807 int written;
1808 int toconv;
1809
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001810 if (handler == NULL)
1811 return (-1);
1812 if (out == NULL)
1813 return (-1);
1814 if (in == NULL)
1815 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001816
1817 toconv = in->use;
1818 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001819 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001820 written = out->size - out->use;
1821 if (toconv * 2 >= written) {
1822 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001823 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001824 }
1825 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001826 ret = handler->input(&out->content[out->use], &written,
1827 in->content, &toconv);
1828 xmlBufferShrink(in, toconv);
1829 out->use += written;
1830 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001831 }
1832#ifdef LIBXML_ICONV_ENABLED
1833 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001834 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1835 &written, in->content, &toconv);
1836 xmlBufferShrink(in, toconv);
1837 out->use += written;
1838 out->content[out->use] = 0;
1839 if (ret == -1)
1840 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001841 }
1842#endif /* LIBXML_ICONV_ENABLED */
1843 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001844 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001845#ifdef DEBUG_ENCODING
1846 xmlGenericError(xmlGenericErrorContext,
1847 "converted %d bytes to %d bytes of input\n",
1848 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001849#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001850 break;
1851 case -1:
1852#ifdef DEBUG_ENCODING
1853 xmlGenericError(xmlGenericErrorContext,
1854 "converted %d bytes to %d bytes of input, %d left\n",
1855 toconv, written, in->use);
1856#endif
1857 break;
1858 case -3:
1859#ifdef DEBUG_ENCODING
1860 xmlGenericError(xmlGenericErrorContext,
1861 "converted %d bytes to %d bytes of input, %d left\n",
1862 toconv, written, in->use);
1863#endif
1864 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001865 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001866 xmlGenericError(xmlGenericErrorContext,
1867 "input conversion failed due to input error\n");
1868 xmlGenericError(xmlGenericErrorContext,
1869 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1870 in->content[0], in->content[1],
1871 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001872 }
1873 /*
1874 * Ignore when input buffer is not on a boundary
1875 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001876 if (ret == -3)
1877 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001878 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001879}
1880
1881/**
1882 * xmlCharEncOutFunc:
1883 * @handler: char enconding transformation data structure
1884 * @out: an xmlBuffer for the output.
1885 * @in: an xmlBuffer for the input
1886 *
1887 * Generic front-end for the encoding handler output function
1888 * a first call with @in == NULL has to be made firs to initiate the
1889 * output in case of non-stateless encoding needing to initiate their
1890 * state or the output (like the BOM in UTF16).
1891 * In case of UTF8 sequence conversion errors for the given encoder,
1892 * the content will be automatically remapped to a CharRef sequence.
1893 *
1894 * Returns the number of byte written if success, or
1895 * -1 general error
1896 * -2 if the transcoding fails (for *in is not valid utf8 string or
1897 * the result of transformation can't fit into the encoding we want), or
1898 */
1899int
1900xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1901 xmlBufferPtr in) {
1902 int ret = -2;
1903 int written;
1904 int writtentot = 0;
1905 int toconv;
1906 int output = 0;
1907
1908 if (handler == NULL) return(-1);
1909 if (out == NULL) return(-1);
1910
1911retry:
1912
1913 written = out->size - out->use;
1914
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001915 if (written > 0)
1916 written--; /* Gennady: count '/0' */
1917
Owen Taylor3473f882001-02-23 17:55:21 +00001918 /*
1919 * First specific handling of in = NULL, i.e. the initialization call
1920 */
1921 if (in == NULL) {
1922 toconv = 0;
1923 if (handler->output != NULL) {
1924 ret = handler->output(&out->content[out->use], &written,
1925 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001926 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001927 out->use += written;
1928 out->content[out->use] = 0;
1929 }
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931#ifdef LIBXML_ICONV_ENABLED
1932 else if (handler->iconv_out != NULL) {
1933 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1934 &written, NULL, &toconv);
1935 out->use += written;
1936 out->content[out->use] = 0;
1937 }
1938#endif /* LIBXML_ICONV_ENABLED */
1939#ifdef DEBUG_ENCODING
1940 xmlGenericError(xmlGenericErrorContext,
1941 "initialized encoder\n");
1942#endif
1943 return(0);
1944 }
1945
1946 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001947 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001948 */
1949 toconv = in->use;
1950 if (toconv == 0)
1951 return(0);
1952 if (toconv * 2 >= written) {
1953 xmlBufferGrow(out, toconv * 2);
1954 written = out->size - out->use - 1;
1955 }
1956 if (handler->output != NULL) {
1957 ret = handler->output(&out->content[out->use], &written,
1958 in->content, &toconv);
1959 xmlBufferShrink(in, toconv);
1960 out->use += written;
1961 writtentot += written;
1962 out->content[out->use] = 0;
1963 }
1964#ifdef LIBXML_ICONV_ENABLED
1965 else if (handler->iconv_out != NULL) {
1966 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1967 &written, in->content, &toconv);
1968 xmlBufferShrink(in, toconv);
1969 out->use += written;
1970 writtentot += written;
1971 out->content[out->use] = 0;
1972 if (ret == -1) {
1973 if (written > 0) {
1974 /*
1975 * Can be a limitation of iconv
1976 */
1977 goto retry;
1978 }
1979 ret = -3;
1980 }
1981 }
1982#endif /* LIBXML_ICONV_ENABLED */
1983 else {
1984 xmlGenericError(xmlGenericErrorContext,
1985 "xmlCharEncOutFunc: no output function !\n");
1986 return(-1);
1987 }
1988
1989 if (ret >= 0) output += ret;
1990
1991 /*
1992 * Attempt to handle error cases
1993 */
1994 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001995 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001996#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00001997 xmlGenericError(xmlGenericErrorContext,
1998 "converted %d bytes to %d bytes of output\n",
1999 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002000#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002001 break;
2002 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002003#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002004 xmlGenericError(xmlGenericErrorContext,
2005 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002006#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002007 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002008 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002009#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002010 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2011 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002012#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002013 break;
2014 case -2: {
2015 int len = in->use;
2016 const xmlChar *utf = (const xmlChar *) in->content;
2017 int cur;
2018
2019 cur = xmlGetUTF8Char(utf, &len);
2020 if (cur > 0) {
2021 xmlChar charref[20];
2022
2023#ifdef DEBUG_ENCODING
2024 xmlGenericError(xmlGenericErrorContext,
2025 "handling output conversion error\n");
2026 xmlGenericError(xmlGenericErrorContext,
2027 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2028 in->content[0], in->content[1],
2029 in->content[2], in->content[3]);
2030#endif
2031 /*
2032 * Removes the UTF8 sequence, and replace it by a charref
2033 * and continue the transcoding phase, hoping the error
2034 * did not mangle the encoder state.
2035 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002036 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002037 xmlBufferShrink(in, len);
2038 xmlBufferAddHead(in, charref, -1);
2039
2040 goto retry;
2041 } else {
2042 xmlGenericError(xmlGenericErrorContext,
2043 "output conversion failed due to conv error\n");
2044 xmlGenericError(xmlGenericErrorContext,
2045 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2046 in->content[0], in->content[1],
2047 in->content[2], in->content[3]);
2048 in->content[0] = ' ';
2049 }
2050 break;
2051 }
2052 }
2053 return(ret);
2054}
2055
2056/**
2057 * xmlCharEncCloseFunc:
2058 * @handler: char enconding transformation data structure
2059 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002060 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002061 *
2062 * Returns 0 if success, or -1 in case of error
2063 */
2064int
2065xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2066 int ret = 0;
2067 if (handler == NULL) return(-1);
2068 if (handler->name == NULL) return(-1);
2069#ifdef LIBXML_ICONV_ENABLED
2070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002071 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002072 * and the associated icon resources.
2073 */
2074 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2075 if (handler->name != NULL)
2076 xmlFree(handler->name);
2077 handler->name = NULL;
2078 if (handler->iconv_out != NULL) {
2079 if (iconv_close(handler->iconv_out))
2080 ret = -1;
2081 handler->iconv_out = NULL;
2082 }
2083 if (handler->iconv_in != NULL) {
2084 if (iconv_close(handler->iconv_in))
2085 ret = -1;
2086 handler->iconv_in = NULL;
2087 }
2088 xmlFree(handler);
2089 }
2090#endif /* LIBXML_ICONV_ENABLED */
2091#ifdef DEBUG_ENCODING
2092 if (ret)
2093 xmlGenericError(xmlGenericErrorContext,
2094 "failed to close the encoding handler\n");
2095 else
2096 xmlGenericError(xmlGenericErrorContext,
2097 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002098#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002099
Owen Taylor3473f882001-02-23 17:55:21 +00002100 return(ret);
2101}
2102
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002103#ifndef LIBXML_ICONV_ENABLED
2104#ifdef LIBXML_ISO8859X_ENABLED
2105
2106/**
2107 * UTF8ToISO8859x:
2108 * @out: a pointer to an array of bytes to store the result
2109 * @outlen: the length of @out
2110 * @in: a pointer to an array of UTF-8 chars
2111 * @inlen: the length of @in
2112 * @xlattable: the 2-level transcoding table
2113 *
2114 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2115 * block of chars out.
2116 *
2117 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2118 * The value of @inlen after return is the number of octets consumed
2119 * as the return value is positive, else unpredictable.
2120 * The value of @outlen after return is the number of ocetes consumed.
2121 */
2122static int
2123UTF8ToISO8859x(unsigned char* out, int *outlen,
2124 const unsigned char* in, int *inlen,
2125 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002126 const unsigned char* outstart = out;
2127 const unsigned char* inend;
2128 const unsigned char* instart = in;
2129
2130 if (in == NULL) {
2131 /*
2132 * initialization nothing to do
2133 */
2134 *outlen = 0;
2135 *inlen = 0;
2136 return(0);
2137 }
2138 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002139 while (in < inend) {
2140 unsigned char d = *in++;
2141 if (d < 0x80) {
2142 *out++ = d;
2143 } else if (d < 0xC0) {
2144 /* trailing byte in leading position */
2145 *outlen = out - outstart;
2146 *inlen = in - instart - 1;
2147 return(-2);
2148 } else if (d < 0xE0) {
2149 unsigned char c;
2150 if (!(in < inend)) {
2151 /* trailing byte not in input buffer */
2152 *outlen = out - outstart;
2153 *inlen = in - instart - 1;
2154 return(-2);
2155 }
2156 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002157 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002158 /* not a trailing byte */
2159 *outlen = out - outstart;
2160 *inlen = in - instart - 2;
2161 return(-2);
2162 }
2163 c = c & 0x3F;
2164 d = d & 0x1F;
2165 d = xlattable [48 + c + xlattable [d] * 64];
2166 if (d == 0) {
2167 /* not in character set */
2168 *outlen = out - outstart;
2169 *inlen = in - instart - 2;
2170 return(-2);
2171 }
2172 *out++ = d;
2173 } else if (d < 0xF0) {
2174 unsigned char c1;
2175 unsigned char c2;
2176 if (!(in < inend - 1)) {
2177 /* trailing bytes not in input buffer */
2178 *outlen = out - outstart;
2179 *inlen = in - instart - 1;
2180 return(-2);
2181 }
2182 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002183 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002184 /* not a trailing byte (c1) */
2185 *outlen = out - outstart;
2186 *inlen = in - instart - 2;
2187 return(-2);
2188 }
2189 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002190 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002191 /* not a trailing byte (c2) */
2192 *outlen = out - outstart;
2193 *inlen = in - instart - 2;
2194 return(-2);
2195 }
2196 c1 = c1 & 0x3F;
2197 c2 = c2 & 0x3F;
2198 d = d & 0x0F;
2199 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2200 if (d == 0) {
2201 /* not in character set */
2202 *outlen = out - outstart;
2203 *inlen = in - instart - 3;
2204 return(-2);
2205 }
2206 *out++ = d;
2207 } else {
2208 /* cannot transcode >= U+010000 */
2209 *outlen = out - outstart;
2210 *inlen = in - instart - 1;
2211 return(-2);
2212 }
2213 }
2214 *outlen = out - outstart;
2215 *inlen = in - instart;
2216 return(0);
2217}
2218
2219/**
2220 * ISO8859xToUTF8
2221 * @out: a pointer to an array of bytes to store the result
2222 * @outlen: the length of @out
2223 * @in: a pointer to an array of ISO Latin 1 chars
2224 * @inlen: the length of @in
2225 *
2226 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2227 * block of chars out.
2228 * Returns 0 if success, or -1 otherwise
2229 * The value of @inlen after return is the number of octets consumed
2230 * The value of @outlen after return is the number of ocetes produced.
2231 */
2232static int
2233ISO8859xToUTF8(unsigned char* out, int *outlen,
2234 const unsigned char* in, int *inlen,
2235 unsigned short const *unicodetable) {
2236 unsigned char* outstart = out;
2237 unsigned char* outend = out + *outlen;
2238 const unsigned char* instart = in;
2239 const unsigned char* inend = in + *inlen;
2240 const unsigned char* instop = inend;
2241 unsigned int c = *in;
2242
2243 while (in < inend && out < outend - 1) {
2244 if (c >= 0x80) {
2245 c = unicodetable [c - 0x80];
2246 if (c == 0) {
2247 /* undefined code point */
2248 *outlen = out - outstart;
2249 *inlen = in - instart;
2250 return (-1);
2251 }
2252 if (c < 0x800) {
2253 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2254 *out++ = (c & 0x3F) | 0x80;
2255 } else {
2256 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2257 *out++ = ((c >> 6) & 0x3F) | 0x80;
2258 *out++ = (c & 0x3F) | 0x80;
2259 }
2260 ++in;
2261 c = *in;
2262 }
2263 if (instop - in > outend - out) instop = in + (outend - out);
2264 while (c < 0x80 && in < instop) {
2265 *out++ = c;
2266 ++in;
2267 c = *in;
2268 }
2269 }
2270 if (in < inend && out < outend && c < 0x80) {
2271 *out++ = c;
2272 ++in;
2273 }
2274 *outlen = out - outstart;
2275 *inlen = in - instart;
2276 return (0);
2277}
2278
2279
2280/************************************************************************
2281 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2282 ************************************************************************/
2283
2284static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2285 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2286 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2287 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2288 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2289 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2290 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2291 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2292 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2293 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2294 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2295 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2296 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2297 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2298 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2299 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2300 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2301};
2302
2303static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2304 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2311 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2312 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2313 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2314 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2315 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2316 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2319 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2320 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2323 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2324 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2325 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2326 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2327 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2328 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2329 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2330 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2331};
2332
2333static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2334 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2335 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2336 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2337 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2338 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2339 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2340 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2341 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2342 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2343 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2344 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2345 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2346 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2347 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2348 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2349 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2350};
2351
2352static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2353 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2356 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2357 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2358 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2360 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2361 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2362 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2363 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2364 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2366 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2367 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2369 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2372 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2380 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2381 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2382 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2383 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2384};
2385
2386static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2387 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2388 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2389 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2390 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2391 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2392 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2393 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2394 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2395 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2396 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2397 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2398 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2399 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2400 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2401 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2402 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2403};
2404
2405static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2406 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2413 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2414 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2415 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2416 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2417 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2418 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2419 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2420 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2421 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2422 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2423 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2424 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2425 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2429 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2430 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2431 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2432 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2433};
2434
2435static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2436 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2437 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2439 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2440 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2441 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2442 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2443 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2444 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2445 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2446 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2447 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2448 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2449 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2450 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2451 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2452};
2453
2454static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2455 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2456 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2457 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2462 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2463 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2464 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2466 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2467 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2468 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2469 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2470 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2471 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2474 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2479 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2482};
2483
2484static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2485 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2486 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2487 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2488 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2489 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2490 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2491 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2492 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2493 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2494 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2495 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2496 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2497 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2498 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2499 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2500 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2501};
2502
2503static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2504 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2505 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2511 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2512 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2513 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2515 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2521 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2522 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2523 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2524 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2527};
2528
2529static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2530 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2531 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2532 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2533 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2534 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2535 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2536 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2537 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2538 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2539 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2540 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2541 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2542 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2543 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2544 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2545 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2546};
2547
2548static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2549 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2551 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2552 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2553 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2557 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2558 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2559 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2560 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2565 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2568 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2572 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2573 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2574 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2575 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2576 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580};
2581
2582static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2583 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2584 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2585 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2586 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2587 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2588 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2589 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2590 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2591 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2592 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2593 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2594 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2595 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2596 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2597 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2598 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2599};
2600
2601static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2602 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2603 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2604 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2610 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2611 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2612 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2613 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2618 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2620 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2621 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2626 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2630 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2631 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2633};
2634
2635static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2636 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2637 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2638 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2639 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2640 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2641 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2642 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2643 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2644 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2645 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2646 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2647 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2648 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2649 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2650 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2651 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2652};
2653
2654static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2655 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2663 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2664 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2665 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2666 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2667 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2668 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2669 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2673 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2674 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678};
2679
2680static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2681 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2682 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2683 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2684 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2685 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2686 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2687 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2688 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2689 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2690 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2691 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2692 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2693 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2694 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2695 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2696 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2697};
2698
2699static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2700 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2708 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2709 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2710 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2712 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2713 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2714 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2715 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2717 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2718 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2719 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2728 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2729 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2730 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2731};
2732
2733static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2734 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2735 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2736 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2737 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2738 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2739 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2740 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2741 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2742 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2743 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2744 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2745 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2746 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2747 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2748 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2749 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2750};
2751
2752static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2753 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2761 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2762 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2767 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2768 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2769 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2770 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2771 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2772 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2775 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2777 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780};
2781
2782static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2783 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2784 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2785 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2786 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2787 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2788 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2789 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2790 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2791 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2792 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2793 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2794 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2795 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2796 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2797 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2798 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2799};
2800
2801static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2802 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2809 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2810 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2811 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2812 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2813 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2821 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2823 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2825 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2826 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2827 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2828 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2829 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2830 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2832 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2833};
2834
2835static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2836 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2837 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2838 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2839 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2840 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2841 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2842 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2843 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2844 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2845 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2846 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2847 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2848 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2849 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2850 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2851 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2852};
2853
2854static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2855 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2863 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2864 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2865 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2870 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2885 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2890 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2895 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2896 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2897 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2898};
2899
2900static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
2901 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2902 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2903 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2904 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2905 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
2906 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2907 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
2908 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
2909 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2910 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2911 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2912 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2913 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2914 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2915 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2916 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
2917};
2918
2919static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
2920 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2928 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2929 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
2930 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
2931 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
2943 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2944 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2945 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2946 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
2947};
2948
2949static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
2950 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2951 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2952 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2953 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2954 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
2955 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
2956 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
2957 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
2958 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
2959 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2960 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
2961 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
2962 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
2963 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2964 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
2965 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
2966};
2967
2968static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
2969 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2972 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2977 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2978 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
2979 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
2980 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
2981 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2986 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
2988 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3003 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3005 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3006 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3007 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3008};
3009
3010
3011/*
3012 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3013 */
3014
3015static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3016 const unsigned char* in, int *inlen) {
3017 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3018}
3019static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3020 const unsigned char* in, int *inlen) {
3021 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3022}
3023
3024static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3025 const unsigned char* in, int *inlen) {
3026 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3027}
3028static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3029 const unsigned char* in, int *inlen) {
3030 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3031}
3032
3033static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3034 const unsigned char* in, int *inlen) {
3035 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3036}
3037static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3038 const unsigned char* in, int *inlen) {
3039 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3040}
3041
3042static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3043 const unsigned char* in, int *inlen) {
3044 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3045}
3046static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3047 const unsigned char* in, int *inlen) {
3048 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3049}
3050
3051static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3052 const unsigned char* in, int *inlen) {
3053 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3054}
3055static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3056 const unsigned char* in, int *inlen) {
3057 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3058}
3059
3060static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3061 const unsigned char* in, int *inlen) {
3062 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3063}
3064static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3065 const unsigned char* in, int *inlen) {
3066 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3067}
3068
3069static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3070 const unsigned char* in, int *inlen) {
3071 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3072}
3073static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3074 const unsigned char* in, int *inlen) {
3075 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3076}
3077
3078static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3079 const unsigned char* in, int *inlen) {
3080 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3081}
3082static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3083 const unsigned char* in, int *inlen) {
3084 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3085}
3086
3087static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3088 const unsigned char* in, int *inlen) {
3089 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3090}
3091static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3092 const unsigned char* in, int *inlen) {
3093 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3094}
3095
3096static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3097 const unsigned char* in, int *inlen) {
3098 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3099}
3100static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3101 const unsigned char* in, int *inlen) {
3102 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3103}
3104
3105static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3106 const unsigned char* in, int *inlen) {
3107 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3108}
3109static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3110 const unsigned char* in, int *inlen) {
3111 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3112}
3113
3114static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3115 const unsigned char* in, int *inlen) {
3116 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3117}
3118static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3119 const unsigned char* in, int *inlen) {
3120 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3121}
3122
3123static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3124 const unsigned char* in, int *inlen) {
3125 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3126}
3127static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3128 const unsigned char* in, int *inlen) {
3129 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3130}
3131
3132static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3133 const unsigned char* in, int *inlen) {
3134 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3135}
3136static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3137 const unsigned char* in, int *inlen) {
3138 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3139}
3140
3141static void
3142xmlRegisterCharEncodingHandlersISO8859x (void) {
3143 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3144 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3145 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3146 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3147 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3148 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3149 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3150 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3151 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3152 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3153 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3154 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3155 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3156 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3157}
3158
3159#endif
3160#endif
3161
3162