blob: 928f3afafcef0e227d7d34534a60867eeeaa430e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
128 return(0);
129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
158 if (in == NULL) {
159 /*
160 * initialization nothing to do
161 */
162 *outlen = 0;
163 *inlen = 0;
164 return(0);
165 }
166 inend = in + (*inlen);
167 outend = out + (*outlen);
168 while (in < inend) {
169 d = *in++;
170 if (d < 0x80) { c= d; trailing= 0; }
171 else if (d < 0xC0) {
172 /* trailing byte in leading position */
173 *outlen = out - outstart;
174 *inlen = processed - instart;
175 return(-2);
176 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
177 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
178 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
179 else {
180 /* no chance for this in Ascii */
181 *outlen = out - outstart;
182 *inlen = processed - instart;
183 return(-2);
184 }
185
186 if (inend - in < trailing) {
187 break;
188 }
189
190 for ( ; trailing; trailing--) {
191 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
192 break;
193 c <<= 6;
194 c |= d & 0x3F;
195 }
196
197 /* assertion: c is a single UTF-4 value */
198 if (c < 0x80) {
199 if (out >= outend)
200 break;
201 *out++ = c;
202 } else {
203 /* no chance for this in Ascii */
204 *outlen = out - outstart;
205 *inlen = processed - instart;
206 return(-2);
207 }
208 processed = in;
209 }
210 *outlen = out - outstart;
211 *inlen = processed - instart;
212 return(0);
213}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000214#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000215
216/**
217 * isolat1ToUTF8:
218 * @out: a pointer to an array of bytes to store the result
219 * @outlen: the length of @out
220 * @in: a pointer to an array of ISO Latin 1 chars
221 * @inlen: the length of @in
222 *
223 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
224 * block of chars out.
225 * Returns 0 if success, or -1 otherwise
226 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000227 * if the return value is positive, else unpredictable.
228 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000229 */
230int
231isolat1ToUTF8(unsigned char* out, int *outlen,
232 const unsigned char* in, int *inlen) {
233 unsigned char* outstart = out;
234 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000235 unsigned char* outend = out + *outlen;
236 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000237 const unsigned char* instop;
238 xmlChar c = *in;
Owen Taylor3473f882001-02-23 17:55:21 +0000239
240 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000241 instop = inend;
242
243 while (in < inend && out < outend - 1) {
244 if (c >= 0x80) {
Daniel Veillarddb552912002-03-21 13:27:59 +0000245 *out++= ((c >> 6) & 0x1F) | 0xC0;
Daniel Veillard02141ea2001-04-30 11:46:40 +0000246 *out++= (c & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000247 ++in;
248 c = *in;
249 }
250 if (instop - in > outend - out) instop = in + (outend - out);
251 while (c < 0x80 && in < instop) {
252 *out++ = c;
253 ++in;
254 c = *in;
255 }
256 }
257 if (in < inend && out < outend && c < 0x80) {
258 *out++ = c;
259 ++in;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 }
261 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000262 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000263 return(0);
264}
265
Daniel Veillard81601f92003-01-14 13:42:37 +0000266/**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
William M. Brackf9415e42003-11-28 09:39:10 +0000275 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000276 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000277 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000278 */
279static int
280UTF8ToUTF8(unsigned char* out, int *outlen,
281 const unsigned char* inb, int *inlenb)
282{
283 int len;
284
285 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
286 return(-1);
287 if (*outlen > *inlenb) {
288 len = *inlenb;
289 } else {
290 len = *outlen;
291 }
292 if (len < 0)
293 return(-1);
294
295 memcpy(out, inb, len);
296
297 *outlen = len;
298 *inlenb = len;
299 return(0);
300}
301
Daniel Veillarde72c7562002-05-31 09:47:30 +0000302
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000303#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000304/**
305 * UTF8Toisolat1:
306 * @out: a pointer to an array of bytes to store the result
307 * @outlen: the length of @out
308 * @in: a pointer to an array of UTF-8 chars
309 * @inlen: the length of @in
310 *
311 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
312 * block of chars out.
313 *
314 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
315 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000316 * if the return value is positive, else unpredictable.
317 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000318 */
319int
320UTF8Toisolat1(unsigned char* out, int *outlen,
321 const unsigned char* in, int *inlen) {
322 const unsigned char* processed = in;
323 const unsigned char* outend;
324 const unsigned char* outstart = out;
325 const unsigned char* instart = in;
326 const unsigned char* inend;
327 unsigned int c, d;
328 int trailing;
329
330 if (in == NULL) {
331 /*
332 * initialization nothing to do
333 */
334 *outlen = 0;
335 *inlen = 0;
336 return(0);
337 }
338 inend = in + (*inlen);
339 outend = out + (*outlen);
340 while (in < inend) {
341 d = *in++;
342 if (d < 0x80) { c= d; trailing= 0; }
343 else if (d < 0xC0) {
344 /* trailing byte in leading position */
345 *outlen = out - outstart;
346 *inlen = processed - instart;
347 return(-2);
348 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
349 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
350 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
351 else {
352 /* no chance for this in IsoLat1 */
353 *outlen = out - outstart;
354 *inlen = processed - instart;
355 return(-2);
356 }
357
358 if (inend - in < trailing) {
359 break;
360 }
361
362 for ( ; trailing; trailing--) {
363 if (in >= inend)
364 break;
365 if (((d= *in++) & 0xC0) != 0x80) {
366 *outlen = out - outstart;
367 *inlen = processed - instart;
368 return(-2);
369 }
370 c <<= 6;
371 c |= d & 0x3F;
372 }
373
374 /* assertion: c is a single UTF-4 value */
375 if (c <= 0xFF) {
376 if (out >= outend)
377 break;
378 *out++ = c;
379 } else {
380 /* no chance for this in IsoLat1 */
381 *outlen = out - outstart;
382 *inlen = processed - instart;
383 return(-2);
384 }
385 processed = in;
386 }
387 *outlen = out - outstart;
388 *inlen = processed - instart;
389 return(0);
390}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000391#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000392
393/**
394 * UTF16LEToUTF8:
395 * @out: a pointer to an array of bytes to store the result
396 * @outlen: the length of @out
397 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
398 * @inlenb: the length of @in in UTF-16LE chars
399 *
400 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000401 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000402 * is the same between the native type of this machine and the
403 * inputed one.
404 *
William M. Brackf9415e42003-11-28 09:39:10 +0000405 * Returns the number of bytes written, or -1 if lack of space, or -2
406 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000407 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000408 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000409 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000410static int
Owen Taylor3473f882001-02-23 17:55:21 +0000411UTF16LEToUTF8(unsigned char* out, int *outlen,
412 const unsigned char* inb, int *inlenb)
413{
414 unsigned char* outstart = out;
415 const unsigned char* processed = inb;
416 unsigned char* outend = out + *outlen;
417 unsigned short* in = (unsigned short*) inb;
418 unsigned short* inend;
419 unsigned int c, d, inlen;
420 unsigned char *tmp;
421 int bits;
422
423 if ((*inlenb % 2) == 1)
424 (*inlenb)--;
425 inlen = *inlenb / 2;
426 inend = in + inlen;
427 while ((in < inend) && (out - outstart + 5 < *outlen)) {
428 if (xmlLittleEndian) {
429 c= *in++;
430 } else {
431 tmp = (unsigned char *) in;
432 c = *tmp++;
433 c = c | (((unsigned int)*tmp) << 8);
434 in++;
435 }
436 if ((c & 0xFC00) == 0xD800) { /* surrogates */
437 if (in >= inend) { /* (in > inend) shouldn't happens */
438 break;
439 }
440 if (xmlLittleEndian) {
441 d = *in++;
442 } else {
443 tmp = (unsigned char *) in;
444 d = *tmp++;
445 d = d | (((unsigned int)*tmp) << 8);
446 in++;
447 }
448 if ((d & 0xFC00) == 0xDC00) {
449 c &= 0x03FF;
450 c <<= 10;
451 c |= d & 0x03FF;
452 c += 0x10000;
453 }
454 else {
455 *outlen = out - outstart;
456 *inlenb = processed - inb;
457 return(-2);
458 }
459 }
460
461 /* assertion: c is a single UTF-4 value */
462 if (out >= outend)
463 break;
464 if (c < 0x80) { *out++= c; bits= -6; }
465 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
466 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
467 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
468
469 for ( ; bits >= 0; bits-= 6) {
470 if (out >= outend)
471 break;
472 *out++= ((c >> bits) & 0x3F) | 0x80;
473 }
474 processed = (const unsigned char*) in;
475 }
476 *outlen = out - outstart;
477 *inlenb = processed - inb;
478 return(0);
479}
480
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000481#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000482/**
483 * UTF8ToUTF16LE:
484 * @outb: a pointer to an array of bytes to store the result
485 * @outlen: the length of @outb
486 * @in: a pointer to an array of UTF-8 chars
487 * @inlen: the length of @in
488 *
489 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
490 * block of chars out.
491 *
William M. Brackf9415e42003-11-28 09:39:10 +0000492 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000493 * if the transcoding failed.
494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000495static int
Owen Taylor3473f882001-02-23 17:55:21 +0000496UTF8ToUTF16LE(unsigned char* outb, int *outlen,
497 const unsigned char* in, int *inlen)
498{
499 unsigned short* out = (unsigned short*) outb;
500 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000501 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000502 unsigned short* outstart= out;
503 unsigned short* outend;
504 const unsigned char* inend= in+*inlen;
505 unsigned int c, d;
506 int trailing;
507 unsigned char *tmp;
508 unsigned short tmp1, tmp2;
509
William M. Brackf9415e42003-11-28 09:39:10 +0000510 /* UTF16LE encoding has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000511 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000512 *outlen = 0;
513 *inlen = 0;
514 return(0);
515 }
516 outend = out + (*outlen / 2);
517 while (in < inend) {
518 d= *in++;
519 if (d < 0x80) { c= d; trailing= 0; }
520 else if (d < 0xC0) {
521 /* trailing byte in leading position */
522 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000523 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000524 return(-2);
525 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
526 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
527 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
528 else {
529 /* no chance for this in UTF-16 */
530 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000531 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000532 return(-2);
533 }
534
535 if (inend - in < trailing) {
536 break;
537 }
538
539 for ( ; trailing; trailing--) {
540 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
541 break;
542 c <<= 6;
543 c |= d & 0x3F;
544 }
545
546 /* assertion: c is a single UTF-4 value */
547 if (c < 0x10000) {
548 if (out >= outend)
549 break;
550 if (xmlLittleEndian) {
551 *out++ = c;
552 } else {
553 tmp = (unsigned char *) out;
554 *tmp = c ;
555 *(tmp + 1) = c >> 8 ;
556 out++;
557 }
558 }
559 else if (c < 0x110000) {
560 if (out+1 >= outend)
561 break;
562 c -= 0x10000;
563 if (xmlLittleEndian) {
564 *out++ = 0xD800 | (c >> 10);
565 *out++ = 0xDC00 | (c & 0x03FF);
566 } else {
567 tmp1 = 0xD800 | (c >> 10);
568 tmp = (unsigned char *) out;
569 *tmp = (unsigned char) tmp1;
570 *(tmp + 1) = tmp1 >> 8;
571 out++;
572
573 tmp2 = 0xDC00 | (c & 0x03FF);
574 tmp = (unsigned char *) out;
575 *tmp = (unsigned char) tmp2;
576 *(tmp + 1) = tmp2 >> 8;
577 out++;
578 }
579 }
580 else
581 break;
582 processed = in;
583 }
584 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000585 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000586 return(0);
587}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000588#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000589
590/**
William M. Brackf9415e42003-11-28 09:39:10 +0000591 * UTF8ToUTF16:
592 * @outb: a pointer to an array of bytes to store the result
593 * @outlen: the length of @outb
594 * @in: a pointer to an array of UTF-8 chars
595 * @inlen: the length of @in
596 *
597 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
598 * block of chars out.
599 *
600 * Returns the number of bytes written, or -1 if lack of space, or -2
601 * if the transcoding failed.
602 */
603static int
604UTF8ToUTF16(unsigned char* outb, int *outlen,
605 const unsigned char* in, int *inlen)
606{
607 if (in == NULL) {
608 /*
609 * initialization, add the Byte Order Mark for UTF-16LE
610 */
611 if (*outlen >= 2) {
612 outb[0] = 0xFF;
613 outb[1] = 0xFE;
614 *outlen = 2;
615 *inlen = 0;
616#ifdef DEBUG_ENCODING
617 xmlGenericError(xmlGenericErrorContext,
618 "Added FFFE Byte Order Mark\n");
619#endif
620 return(2);
621 }
622 *outlen = 0;
623 *inlen = 0;
624 return(0);
625 }
626 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
627}
628
629/**
Owen Taylor3473f882001-02-23 17:55:21 +0000630 * UTF16BEToUTF8:
631 * @out: a pointer to an array of bytes to store the result
632 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000633 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000634 * @inlenb: the length of @in in UTF-16 chars
635 *
636 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000637 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * is the same between the native type of this machine and the
639 * inputed one.
640 *
William M. Brackf9415e42003-11-28 09:39:10 +0000641 * Returns the number of bytes written, or -1 if lack of space, or -2
642 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000643 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000644 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000645 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000646static int
Owen Taylor3473f882001-02-23 17:55:21 +0000647UTF16BEToUTF8(unsigned char* out, int *outlen,
648 const unsigned char* inb, int *inlenb)
649{
650 unsigned char* outstart = out;
651 const unsigned char* processed = inb;
652 unsigned char* outend = out + *outlen;
653 unsigned short* in = (unsigned short*) inb;
654 unsigned short* inend;
655 unsigned int c, d, inlen;
656 unsigned char *tmp;
657 int bits;
658
659 if ((*inlenb % 2) == 1)
660 (*inlenb)--;
661 inlen = *inlenb / 2;
662 inend= in + inlen;
663 while (in < inend) {
664 if (xmlLittleEndian) {
665 tmp = (unsigned char *) in;
666 c = *tmp++;
667 c = c << 8;
668 c = c | (unsigned int) *tmp;
669 in++;
670 } else {
671 c= *in++;
672 }
673 if ((c & 0xFC00) == 0xD800) { /* surrogates */
674 if (in >= inend) { /* (in > inend) shouldn't happens */
675 *outlen = out - outstart;
676 *inlenb = processed - inb;
677 return(-2);
678 }
679 if (xmlLittleEndian) {
680 tmp = (unsigned char *) in;
681 d = *tmp++;
682 d = d << 8;
683 d = d | (unsigned int) *tmp;
684 in++;
685 } else {
686 d= *in++;
687 }
688 if ((d & 0xFC00) == 0xDC00) {
689 c &= 0x03FF;
690 c <<= 10;
691 c |= d & 0x03FF;
692 c += 0x10000;
693 }
694 else {
695 *outlen = out - outstart;
696 *inlenb = processed - inb;
697 return(-2);
698 }
699 }
700
701 /* assertion: c is a single UTF-4 value */
702 if (out >= outend)
703 break;
704 if (c < 0x80) { *out++= c; bits= -6; }
705 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
706 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
707 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
708
709 for ( ; bits >= 0; bits-= 6) {
710 if (out >= outend)
711 break;
712 *out++= ((c >> bits) & 0x3F) | 0x80;
713 }
714 processed = (const unsigned char*) in;
715 }
716 *outlen = out - outstart;
717 *inlenb = processed - inb;
718 return(0);
719}
720
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000721#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000722/**
723 * UTF8ToUTF16BE:
724 * @outb: a pointer to an array of bytes to store the result
725 * @outlen: the length of @outb
726 * @in: a pointer to an array of UTF-8 chars
727 * @inlen: the length of @in
728 *
729 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
730 * block of chars out.
731 *
732 * Returns the number of byte written, or -1 by lack of space, or -2
733 * if the transcoding failed.
734 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000735static int
Owen Taylor3473f882001-02-23 17:55:21 +0000736UTF8ToUTF16BE(unsigned char* outb, int *outlen,
737 const unsigned char* in, int *inlen)
738{
739 unsigned short* out = (unsigned short*) outb;
740 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000741 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000742 unsigned short* outstart= out;
743 unsigned short* outend;
744 const unsigned char* inend= in+*inlen;
745 unsigned int c, d;
746 int trailing;
747 unsigned char *tmp;
748 unsigned short tmp1, tmp2;
749
William M. Brackf9415e42003-11-28 09:39:10 +0000750 /* UTF-16BE has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000751 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000752 *outlen = 0;
753 *inlen = 0;
754 return(0);
755 }
756 outend = out + (*outlen / 2);
757 while (in < inend) {
758 d= *in++;
759 if (d < 0x80) { c= d; trailing= 0; }
760 else if (d < 0xC0) {
761 /* trailing byte in leading position */
762 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000763 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000764 return(-2);
765 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
766 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
767 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
768 else {
769 /* no chance for this in UTF-16 */
770 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000771 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000772 return(-2);
773 }
774
775 if (inend - in < trailing) {
776 break;
777 }
778
779 for ( ; trailing; trailing--) {
780 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
781 c <<= 6;
782 c |= d & 0x3F;
783 }
784
785 /* assertion: c is a single UTF-4 value */
786 if (c < 0x10000) {
787 if (out >= outend) break;
788 if (xmlLittleEndian) {
789 tmp = (unsigned char *) out;
790 *tmp = c >> 8;
791 *(tmp + 1) = c;
792 out++;
793 } else {
794 *out++ = c;
795 }
796 }
797 else if (c < 0x110000) {
798 if (out+1 >= outend) break;
799 c -= 0x10000;
800 if (xmlLittleEndian) {
801 tmp1 = 0xD800 | (c >> 10);
802 tmp = (unsigned char *) out;
803 *tmp = tmp1 >> 8;
804 *(tmp + 1) = (unsigned char) tmp1;
805 out++;
806
807 tmp2 = 0xDC00 | (c & 0x03FF);
808 tmp = (unsigned char *) out;
809 *tmp = tmp2 >> 8;
810 *(tmp + 1) = (unsigned char) tmp2;
811 out++;
812 } else {
813 *out++ = 0xD800 | (c >> 10);
814 *out++ = 0xDC00 | (c & 0x03FF);
815 }
816 }
817 else
818 break;
819 processed = in;
820 }
821 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000822 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000823 return(0);
824}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000825#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000826
Daniel Veillard97ac1312001-05-30 19:14:17 +0000827/************************************************************************
828 * *
829 * Generic encoding handling routines *
830 * *
831 ************************************************************************/
832
Owen Taylor3473f882001-02-23 17:55:21 +0000833/**
834 * xmlDetectCharEncoding:
835 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000836 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000837 * @len: pointer to the length of the buffer
838 *
839 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000840 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000841 *
842 * Returns one of the XML_CHAR_ENCODING_... values.
843 */
844xmlCharEncoding
845xmlDetectCharEncoding(const unsigned char* in, int len)
846{
847 if (len >= 4) {
848 if ((in[0] == 0x00) && (in[1] == 0x00) &&
849 (in[2] == 0x00) && (in[3] == 0x3C))
850 return(XML_CHAR_ENCODING_UCS4BE);
851 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
852 (in[2] == 0x00) && (in[3] == 0x00))
853 return(XML_CHAR_ENCODING_UCS4LE);
854 if ((in[0] == 0x00) && (in[1] == 0x00) &&
855 (in[2] == 0x3C) && (in[3] == 0x00))
856 return(XML_CHAR_ENCODING_UCS4_2143);
857 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
858 (in[2] == 0x00) && (in[3] == 0x00))
859 return(XML_CHAR_ENCODING_UCS4_3412);
860 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
861 (in[2] == 0xA7) && (in[3] == 0x94))
862 return(XML_CHAR_ENCODING_EBCDIC);
863 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
864 (in[2] == 0x78) && (in[3] == 0x6D))
865 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000866 /*
867 * Although not part of the recommendation, we also
868 * attempt an "auto-recognition" of UTF-16LE and
869 * UTF-16BE encodings.
870 */
871 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
872 (in[2] == 0x3F) && (in[3] == 0x00))
873 return(XML_CHAR_ENCODING_UTF16LE);
874 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
875 (in[2] == 0x00) && (in[3] == 0x3F))
876 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000877 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000878 if (len >= 3) {
879 /*
880 * Errata on XML-1.0 June 20 2001
881 * We now allow an UTF8 encoded BOM
882 */
883 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
884 (in[2] == 0xBF))
885 return(XML_CHAR_ENCODING_UTF8);
886 }
William M. Brackf9415e42003-11-28 09:39:10 +0000887 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000888 if (len >= 2) {
889 if ((in[0] == 0xFE) && (in[1] == 0xFF))
890 return(XML_CHAR_ENCODING_UTF16BE);
891 if ((in[0] == 0xFF) && (in[1] == 0xFE))
892 return(XML_CHAR_ENCODING_UTF16LE);
893 }
894 return(XML_CHAR_ENCODING_NONE);
895}
896
897/**
898 * xmlCleanupEncodingAliases:
899 *
900 * Unregisters all aliases
901 */
902void
903xmlCleanupEncodingAliases(void) {
904 int i;
905
906 if (xmlCharEncodingAliases == NULL)
907 return;
908
909 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
910 if (xmlCharEncodingAliases[i].name != NULL)
911 xmlFree((char *) xmlCharEncodingAliases[i].name);
912 if (xmlCharEncodingAliases[i].alias != NULL)
913 xmlFree((char *) xmlCharEncodingAliases[i].alias);
914 }
915 xmlCharEncodingAliasesNb = 0;
916 xmlCharEncodingAliasesMax = 0;
917 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000918 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000919}
920
921/**
922 * xmlGetEncodingAlias:
923 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
924 *
925 * Lookup an encoding name for the given alias.
926 *
William M. Brackf9415e42003-11-28 09:39:10 +0000927 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000928 */
929const char *
930xmlGetEncodingAlias(const char *alias) {
931 int i;
932 char upper[100];
933
934 if (alias == NULL)
935 return(NULL);
936
937 if (xmlCharEncodingAliases == NULL)
938 return(NULL);
939
940 for (i = 0;i < 99;i++) {
941 upper[i] = toupper(alias[i]);
942 if (upper[i] == 0) break;
943 }
944 upper[i] = 0;
945
946 /*
947 * Walk down the list looking for a definition of the alias
948 */
949 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
950 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
951 return(xmlCharEncodingAliases[i].name);
952 }
953 }
954 return(NULL);
955}
956
957/**
958 * xmlAddEncodingAlias:
959 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
960 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
961 *
William M. Brackf9415e42003-11-28 09:39:10 +0000962 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000963 * will be overwritten.
964 *
965 * Returns 0 in case of success, -1 in case of error
966 */
967int
968xmlAddEncodingAlias(const char *name, const char *alias) {
969 int i;
970 char upper[100];
971
972 if ((name == NULL) || (alias == NULL))
973 return(-1);
974
975 for (i = 0;i < 99;i++) {
976 upper[i] = toupper(alias[i]);
977 if (upper[i] == 0) break;
978 }
979 upper[i] = 0;
980
981 if (xmlCharEncodingAliases == NULL) {
982 xmlCharEncodingAliasesNb = 0;
983 xmlCharEncodingAliasesMax = 20;
984 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
985 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
986 if (xmlCharEncodingAliases == NULL)
987 return(-1);
988 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
989 xmlCharEncodingAliasesMax *= 2;
990 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
991 xmlRealloc(xmlCharEncodingAliases,
992 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
993 }
994 /*
995 * Walk down the list looking for a definition of the alias
996 */
997 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
998 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
999 /*
1000 * Replace the definition.
1001 */
1002 xmlFree((char *) xmlCharEncodingAliases[i].name);
1003 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1004 return(0);
1005 }
1006 }
1007 /*
1008 * Add the definition
1009 */
1010 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1011 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1012 xmlCharEncodingAliasesNb++;
1013 return(0);
1014}
1015
1016/**
1017 * xmlDelEncodingAlias:
1018 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1019 *
1020 * Unregisters an encoding alias @alias
1021 *
1022 * Returns 0 in case of success, -1 in case of error
1023 */
1024int
1025xmlDelEncodingAlias(const char *alias) {
1026 int i;
1027
1028 if (alias == NULL)
1029 return(-1);
1030
1031 if (xmlCharEncodingAliases == NULL)
1032 return(-1);
1033 /*
1034 * Walk down the list looking for a definition of the alias
1035 */
1036 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1037 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1038 xmlFree((char *) xmlCharEncodingAliases[i].name);
1039 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1040 xmlCharEncodingAliasesNb--;
1041 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1042 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1043 return(0);
1044 }
1045 }
1046 return(-1);
1047}
1048
1049/**
1050 * xmlParseCharEncoding:
1051 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1052 *
William M. Brackf9415e42003-11-28 09:39:10 +00001053 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001054 * that the comparison is case insensitive accordingly to the section
1055 * [XML] 4.3.3 Character Encoding in Entities.
1056 *
1057 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1058 * if not recognized.
1059 */
1060xmlCharEncoding
1061xmlParseCharEncoding(const char* name)
1062{
1063 const char *alias;
1064 char upper[500];
1065 int i;
1066
1067 if (name == NULL)
1068 return(XML_CHAR_ENCODING_NONE);
1069
1070 /*
1071 * Do the alias resolution
1072 */
1073 alias = xmlGetEncodingAlias(name);
1074 if (alias != NULL)
1075 name = alias;
1076
1077 for (i = 0;i < 499;i++) {
1078 upper[i] = toupper(name[i]);
1079 if (upper[i] == 0) break;
1080 }
1081 upper[i] = 0;
1082
1083 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1084 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1085 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1086
1087 /*
1088 * NOTE: if we were able to parse this, the endianness of UTF16 is
1089 * already found and in use
1090 */
1091 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1092 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1093
1094 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1095 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1096 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1097
1098 /*
1099 * NOTE: if we were able to parse this, the endianness of UCS4 is
1100 * already found and in use
1101 */
1102 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1103 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1104 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1105
1106
1107 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1108 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1109 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1110
1111 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1112 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1113 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1114
1115 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1116 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1117 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1118 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1119 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1120 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1121 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1122
1123 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1124 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1125 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1126
1127#ifdef DEBUG_ENCODING
1128 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1129#endif
1130 return(XML_CHAR_ENCODING_ERROR);
1131}
1132
1133/**
1134 * xmlGetCharEncodingName:
1135 * @enc: the encoding
1136 *
1137 * The "canonical" name for XML encoding.
1138 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1139 * Section 4.3.3 Character Encoding in Entities
1140 *
1141 * Returns the canonical name for the given encoding
1142 */
1143
1144const char*
1145xmlGetCharEncodingName(xmlCharEncoding enc) {
1146 switch (enc) {
1147 case XML_CHAR_ENCODING_ERROR:
1148 return(NULL);
1149 case XML_CHAR_ENCODING_NONE:
1150 return(NULL);
1151 case XML_CHAR_ENCODING_UTF8:
1152 return("UTF-8");
1153 case XML_CHAR_ENCODING_UTF16LE:
1154 return("UTF-16");
1155 case XML_CHAR_ENCODING_UTF16BE:
1156 return("UTF-16");
1157 case XML_CHAR_ENCODING_EBCDIC:
1158 return("EBCDIC");
1159 case XML_CHAR_ENCODING_UCS4LE:
1160 return("ISO-10646-UCS-4");
1161 case XML_CHAR_ENCODING_UCS4BE:
1162 return("ISO-10646-UCS-4");
1163 case XML_CHAR_ENCODING_UCS4_2143:
1164 return("ISO-10646-UCS-4");
1165 case XML_CHAR_ENCODING_UCS4_3412:
1166 return("ISO-10646-UCS-4");
1167 case XML_CHAR_ENCODING_UCS2:
1168 return("ISO-10646-UCS-2");
1169 case XML_CHAR_ENCODING_8859_1:
1170 return("ISO-8859-1");
1171 case XML_CHAR_ENCODING_8859_2:
1172 return("ISO-8859-2");
1173 case XML_CHAR_ENCODING_8859_3:
1174 return("ISO-8859-3");
1175 case XML_CHAR_ENCODING_8859_4:
1176 return("ISO-8859-4");
1177 case XML_CHAR_ENCODING_8859_5:
1178 return("ISO-8859-5");
1179 case XML_CHAR_ENCODING_8859_6:
1180 return("ISO-8859-6");
1181 case XML_CHAR_ENCODING_8859_7:
1182 return("ISO-8859-7");
1183 case XML_CHAR_ENCODING_8859_8:
1184 return("ISO-8859-8");
1185 case XML_CHAR_ENCODING_8859_9:
1186 return("ISO-8859-9");
1187 case XML_CHAR_ENCODING_2022_JP:
1188 return("ISO-2022-JP");
1189 case XML_CHAR_ENCODING_SHIFT_JIS:
1190 return("Shift-JIS");
1191 case XML_CHAR_ENCODING_EUC_JP:
1192 return("EUC-JP");
1193 case XML_CHAR_ENCODING_ASCII:
1194 return(NULL);
1195 }
1196 return(NULL);
1197}
1198
Daniel Veillard97ac1312001-05-30 19:14:17 +00001199/************************************************************************
1200 * *
1201 * Char encoding handlers *
1202 * *
1203 ************************************************************************/
1204
Owen Taylor3473f882001-02-23 17:55:21 +00001205
1206/* the size should be growable, but it's not a big deal ... */
1207#define MAX_ENCODING_HANDLERS 50
1208static xmlCharEncodingHandlerPtr *handlers = NULL;
1209static int nbCharEncodingHandler = 0;
1210
1211/*
1212 * The default is UTF-8 for XML, that's also the default used for the
1213 * parser internals, so the default encoding handler is NULL
1214 */
1215
1216static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1217
1218/**
1219 * xmlNewCharEncodingHandler:
1220 * @name: the encoding name, in UTF-8 format (ASCII actually)
1221 * @input: the xmlCharEncodingInputFunc to read that encoding
1222 * @output: the xmlCharEncodingOutputFunc to write that encoding
1223 *
1224 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001225 *
Owen Taylor3473f882001-02-23 17:55:21 +00001226 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1227 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001228xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001229xmlNewCharEncodingHandler(const char *name,
1230 xmlCharEncodingInputFunc input,
1231 xmlCharEncodingOutputFunc output) {
1232 xmlCharEncodingHandlerPtr handler;
1233 const char *alias;
1234 char upper[500];
1235 int i;
1236 char *up = 0;
1237
1238 /*
1239 * Do the alias resolution
1240 */
1241 alias = xmlGetEncodingAlias(name);
1242 if (alias != NULL)
1243 name = alias;
1244
1245 /*
1246 * Keep only the uppercase version of the encoding.
1247 */
1248 if (name == NULL) {
1249 xmlGenericError(xmlGenericErrorContext,
1250 "xmlNewCharEncodingHandler : no name !\n");
1251 return(NULL);
1252 }
1253 for (i = 0;i < 499;i++) {
1254 upper[i] = toupper(name[i]);
1255 if (upper[i] == 0) break;
1256 }
1257 upper[i] = 0;
1258 up = xmlMemStrdup(upper);
1259 if (up == NULL) {
1260 xmlGenericError(xmlGenericErrorContext,
1261 "xmlNewCharEncodingHandler : out of memory !\n");
1262 return(NULL);
1263 }
1264
1265 /*
1266 * allocate and fill-up an handler block.
1267 */
1268 handler = (xmlCharEncodingHandlerPtr)
1269 xmlMalloc(sizeof(xmlCharEncodingHandler));
1270 if (handler == NULL) {
1271 xmlGenericError(xmlGenericErrorContext,
1272 "xmlNewCharEncodingHandler : out of memory !\n");
1273 return(NULL);
1274 }
1275 handler->input = input;
1276 handler->output = output;
1277 handler->name = up;
1278
1279#ifdef LIBXML_ICONV_ENABLED
1280 handler->iconv_in = NULL;
1281 handler->iconv_out = NULL;
1282#endif /* LIBXML_ICONV_ENABLED */
1283
1284 /*
1285 * registers and returns the handler.
1286 */
1287 xmlRegisterCharEncodingHandler(handler);
1288#ifdef DEBUG_ENCODING
1289 xmlGenericError(xmlGenericErrorContext,
1290 "Registered encoding handler for %s\n", name);
1291#endif
1292 return(handler);
1293}
1294
1295/**
1296 * xmlInitCharEncodingHandlers:
1297 *
1298 * Initialize the char encoding support, it registers the default
1299 * encoding supported.
1300 * NOTE: while public, this function usually doesn't need to be called
1301 * in normal processing.
1302 */
1303void
1304xmlInitCharEncodingHandlers(void) {
1305 unsigned short int tst = 0x1234;
1306 unsigned char *ptr = (unsigned char *) &tst;
1307
1308 if (handlers != NULL) return;
1309
1310 handlers = (xmlCharEncodingHandlerPtr *)
1311 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1312
1313 if (*ptr == 0x12) xmlLittleEndian = 0;
1314 else if (*ptr == 0x34) xmlLittleEndian = 1;
1315 else xmlGenericError(xmlGenericErrorContext,
1316 "Odd problem at endianness detection\n");
1317
1318 if (handlers == NULL) {
1319 xmlGenericError(xmlGenericErrorContext,
1320 "xmlInitCharEncodingHandlers : out of memory !\n");
1321 return;
1322 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001323 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001324#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001325 xmlUTF16LEHandler =
1326 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1327 xmlUTF16BEHandler =
1328 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001329 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001330 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1331 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001332 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001333#ifdef LIBXML_HTML_ENABLED
1334 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1335#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001336#else
1337 xmlUTF16LEHandler =
1338 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1339 xmlUTF16BEHandler =
1340 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001341 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001342 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1343 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1344 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1345#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001346#ifndef LIBXML_ICONV_ENABLED
1347#ifdef LIBXML_ISO8859X_ENABLED
1348 xmlRegisterCharEncodingHandlersISO8859x ();
1349#endif
1350#endif
1351
Owen Taylor3473f882001-02-23 17:55:21 +00001352}
1353
1354/**
1355 * xmlCleanupCharEncodingHandlers:
1356 *
1357 * Cleanup the memory allocated for the char encoding support, it
1358 * unregisters all the encoding handlers and the aliases.
1359 */
1360void
1361xmlCleanupCharEncodingHandlers(void) {
1362 xmlCleanupEncodingAliases();
1363
1364 if (handlers == NULL) return;
1365
1366 for (;nbCharEncodingHandler > 0;) {
1367 nbCharEncodingHandler--;
1368 if (handlers[nbCharEncodingHandler] != NULL) {
1369 if (handlers[nbCharEncodingHandler]->name != NULL)
1370 xmlFree(handlers[nbCharEncodingHandler]->name);
1371 xmlFree(handlers[nbCharEncodingHandler]);
1372 }
1373 }
1374 xmlFree(handlers);
1375 handlers = NULL;
1376 nbCharEncodingHandler = 0;
1377 xmlDefaultCharEncodingHandler = NULL;
1378}
1379
1380/**
1381 * xmlRegisterCharEncodingHandler:
1382 * @handler: the xmlCharEncodingHandlerPtr handler block
1383 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001384 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001385 */
1386void
1387xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1388 if (handlers == NULL) xmlInitCharEncodingHandlers();
1389 if (handler == NULL) {
1390 xmlGenericError(xmlGenericErrorContext,
1391 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1392 return;
1393 }
1394
1395 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1396 xmlGenericError(xmlGenericErrorContext,
1397 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1398 xmlGenericError(xmlGenericErrorContext,
1399 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1400 return;
1401 }
1402 handlers[nbCharEncodingHandler++] = handler;
1403}
1404
1405/**
1406 * xmlGetCharEncodingHandler:
1407 * @enc: an xmlCharEncoding value.
1408 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001409 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001410 *
1411 * Returns the handler or NULL if not found
1412 */
1413xmlCharEncodingHandlerPtr
1414xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1415 xmlCharEncodingHandlerPtr handler;
1416
1417 if (handlers == NULL) xmlInitCharEncodingHandlers();
1418 switch (enc) {
1419 case XML_CHAR_ENCODING_ERROR:
1420 return(NULL);
1421 case XML_CHAR_ENCODING_NONE:
1422 return(NULL);
1423 case XML_CHAR_ENCODING_UTF8:
1424 return(NULL);
1425 case XML_CHAR_ENCODING_UTF16LE:
1426 return(xmlUTF16LEHandler);
1427 case XML_CHAR_ENCODING_UTF16BE:
1428 return(xmlUTF16BEHandler);
1429 case XML_CHAR_ENCODING_EBCDIC:
1430 handler = xmlFindCharEncodingHandler("EBCDIC");
1431 if (handler != NULL) return(handler);
1432 handler = xmlFindCharEncodingHandler("ebcdic");
1433 if (handler != NULL) return(handler);
1434 break;
1435 case XML_CHAR_ENCODING_UCS4BE:
1436 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1437 if (handler != NULL) return(handler);
1438 handler = xmlFindCharEncodingHandler("UCS-4");
1439 if (handler != NULL) return(handler);
1440 handler = xmlFindCharEncodingHandler("UCS4");
1441 if (handler != NULL) return(handler);
1442 break;
1443 case XML_CHAR_ENCODING_UCS4LE:
1444 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1445 if (handler != NULL) return(handler);
1446 handler = xmlFindCharEncodingHandler("UCS-4");
1447 if (handler != NULL) return(handler);
1448 handler = xmlFindCharEncodingHandler("UCS4");
1449 if (handler != NULL) return(handler);
1450 break;
1451 case XML_CHAR_ENCODING_UCS4_2143:
1452 break;
1453 case XML_CHAR_ENCODING_UCS4_3412:
1454 break;
1455 case XML_CHAR_ENCODING_UCS2:
1456 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1457 if (handler != NULL) return(handler);
1458 handler = xmlFindCharEncodingHandler("UCS-2");
1459 if (handler != NULL) return(handler);
1460 handler = xmlFindCharEncodingHandler("UCS2");
1461 if (handler != NULL) return(handler);
1462 break;
1463
1464 /*
1465 * We used to keep ISO Latin encodings native in the
1466 * generated data. This led to so many problems that
1467 * this has been removed. One can still change this
1468 * back by registering no-ops encoders for those
1469 */
1470 case XML_CHAR_ENCODING_8859_1:
1471 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1472 if (handler != NULL) return(handler);
1473 break;
1474 case XML_CHAR_ENCODING_8859_2:
1475 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1476 if (handler != NULL) return(handler);
1477 break;
1478 case XML_CHAR_ENCODING_8859_3:
1479 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1480 if (handler != NULL) return(handler);
1481 break;
1482 case XML_CHAR_ENCODING_8859_4:
1483 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1484 if (handler != NULL) return(handler);
1485 break;
1486 case XML_CHAR_ENCODING_8859_5:
1487 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1488 if (handler != NULL) return(handler);
1489 break;
1490 case XML_CHAR_ENCODING_8859_6:
1491 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1492 if (handler != NULL) return(handler);
1493 break;
1494 case XML_CHAR_ENCODING_8859_7:
1495 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1496 if (handler != NULL) return(handler);
1497 break;
1498 case XML_CHAR_ENCODING_8859_8:
1499 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1500 if (handler != NULL) return(handler);
1501 break;
1502 case XML_CHAR_ENCODING_8859_9:
1503 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1504 if (handler != NULL) return(handler);
1505 break;
1506
1507
1508 case XML_CHAR_ENCODING_2022_JP:
1509 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1510 if (handler != NULL) return(handler);
1511 break;
1512 case XML_CHAR_ENCODING_SHIFT_JIS:
1513 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1514 if (handler != NULL) return(handler);
1515 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1516 if (handler != NULL) return(handler);
1517 handler = xmlFindCharEncodingHandler("Shift_JIS");
1518 if (handler != NULL) return(handler);
1519 break;
1520 case XML_CHAR_ENCODING_EUC_JP:
1521 handler = xmlFindCharEncodingHandler("EUC-JP");
1522 if (handler != NULL) return(handler);
1523 break;
1524 default:
1525 break;
1526 }
1527
1528#ifdef DEBUG_ENCODING
1529 xmlGenericError(xmlGenericErrorContext,
1530 "No handler found for encoding %d\n", enc);
1531#endif
1532 return(NULL);
1533}
1534
1535/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001536 * xmlFindCharEncodingHandler:
1537 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001538 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001539 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001540 *
1541 * Returns the handler or NULL if not found
1542 */
1543xmlCharEncodingHandlerPtr
1544xmlFindCharEncodingHandler(const char *name) {
1545 const char *nalias;
1546 const char *norig;
1547 xmlCharEncoding alias;
1548#ifdef LIBXML_ICONV_ENABLED
1549 xmlCharEncodingHandlerPtr enc;
1550 iconv_t icv_in, icv_out;
1551#endif /* LIBXML_ICONV_ENABLED */
1552 char upper[100];
1553 int i;
1554
1555 if (handlers == NULL) xmlInitCharEncodingHandlers();
1556 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1557 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1558
1559 /*
1560 * Do the alias resolution
1561 */
1562 norig = name;
1563 nalias = xmlGetEncodingAlias(name);
1564 if (nalias != NULL)
1565 name = nalias;
1566
1567 /*
1568 * Check first for directly registered encoding names
1569 */
1570 for (i = 0;i < 99;i++) {
1571 upper[i] = toupper(name[i]);
1572 if (upper[i] == 0) break;
1573 }
1574 upper[i] = 0;
1575
1576 for (i = 0;i < nbCharEncodingHandler; i++)
1577 if (!strcmp(upper, handlers[i]->name)) {
1578#ifdef DEBUG_ENCODING
1579 xmlGenericError(xmlGenericErrorContext,
1580 "Found registered handler for encoding %s\n", name);
1581#endif
1582 return(handlers[i]);
1583 }
1584
1585#ifdef LIBXML_ICONV_ENABLED
1586 /* check whether iconv can handle this */
1587 icv_in = iconv_open("UTF-8", name);
1588 icv_out = iconv_open(name, "UTF-8");
1589 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1590 enc = (xmlCharEncodingHandlerPtr)
1591 xmlMalloc(sizeof(xmlCharEncodingHandler));
1592 if (enc == NULL) {
1593 iconv_close(icv_in);
1594 iconv_close(icv_out);
1595 return(NULL);
1596 }
1597 enc->name = xmlMemStrdup(name);
1598 enc->input = NULL;
1599 enc->output = NULL;
1600 enc->iconv_in = icv_in;
1601 enc->iconv_out = icv_out;
1602#ifdef DEBUG_ENCODING
1603 xmlGenericError(xmlGenericErrorContext,
1604 "Found iconv handler for encoding %s\n", name);
1605#endif
1606 return enc;
1607 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1608 xmlGenericError(xmlGenericErrorContext,
1609 "iconv : problems with filters for '%s'\n", name);
1610 }
1611#endif /* LIBXML_ICONV_ENABLED */
1612
1613#ifdef DEBUG_ENCODING
1614 xmlGenericError(xmlGenericErrorContext,
1615 "No handler found for encoding %s\n", name);
1616#endif
1617
1618 /*
1619 * Fallback using the canonical names
1620 */
1621 alias = xmlParseCharEncoding(norig);
1622 if (alias != XML_CHAR_ENCODING_ERROR) {
1623 const char* canon;
1624 canon = xmlGetCharEncodingName(alias);
1625 if ((canon != NULL) && (strcmp(name, canon))) {
1626 return(xmlFindCharEncodingHandler(canon));
1627 }
1628 }
1629
William M. Brackf9415e42003-11-28 09:39:10 +00001630 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001631 return(NULL);
1632}
1633
Daniel Veillard97ac1312001-05-30 19:14:17 +00001634/************************************************************************
1635 * *
1636 * ICONV based generic conversion functions *
1637 * *
1638 ************************************************************************/
1639
Owen Taylor3473f882001-02-23 17:55:21 +00001640#ifdef LIBXML_ICONV_ENABLED
1641/**
1642 * xmlIconvWrapper:
1643 * @cd: iconv converter data structure
1644 * @out: a pointer to an array of bytes to store the result
1645 * @outlen: the length of @out
1646 * @in: a pointer to an array of ISO Latin 1 chars
1647 * @inlen: the length of @in
1648 *
1649 * Returns 0 if success, or
1650 * -1 by lack of space, or
1651 * -2 if the transcoding fails (for *in is not valid utf8 string or
1652 * the result of transformation can't fit into the encoding we want), or
1653 * -3 if there the last byte can't form a single output char.
1654 *
1655 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001656 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001657 * The value of @outlen after return is the number of ocetes consumed.
1658 */
1659static int
1660xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00001661 unsigned char *out, int *outlen,
1662 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00001663
Daniel Veillard9403a042001-05-28 11:00:53 +00001664 size_t icv_inlen = *inlen, icv_outlen = *outlen;
1665 const char *icv_in = (const char *) in;
1666 char *icv_out = (char *) out;
1667 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001668
Darin Adler699613b2001-07-27 22:47:14 +00001669 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001670 if (in != NULL) {
1671 *inlen -= icv_inlen;
1672 *outlen -= icv_outlen;
1673 } else {
1674 *inlen = 0;
1675 *outlen = 0;
1676 }
1677 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001678#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001679 if (errno == EILSEQ) {
1680 return -2;
1681 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001682#endif
1683#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001684 if (errno == E2BIG) {
1685 return -1;
1686 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001687#endif
1688#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001689 if (errno == EINVAL) {
1690 return -3;
1691 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001692#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001693 {
1694 return -3;
1695 }
1696 }
1697 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001698}
1699#endif /* LIBXML_ICONV_ENABLED */
1700
Daniel Veillard97ac1312001-05-30 19:14:17 +00001701/************************************************************************
1702 * *
1703 * The real API used by libxml for on-the-fly conversion *
1704 * *
1705 ************************************************************************/
1706
Owen Taylor3473f882001-02-23 17:55:21 +00001707/**
1708 * xmlCharEncFirstLine:
1709 * @handler: char enconding transformation data structure
1710 * @out: an xmlBuffer for the output.
1711 * @in: an xmlBuffer for the input
1712 *
1713 * Front-end for the encoding handler input function, but handle only
1714 * the very first line, i.e. limit itself to 45 chars.
1715 *
1716 * Returns the number of byte written if success, or
1717 * -1 general error
1718 * -2 if the transcoding fails (for *in is not valid utf8 string or
1719 * the result of transformation can't fit into the encoding we want), or
1720 */
1721int
1722xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1723 xmlBufferPtr in) {
1724 int ret = -2;
1725 int written;
1726 int toconv;
1727
1728 if (handler == NULL) return(-1);
1729 if (out == NULL) return(-1);
1730 if (in == NULL) return(-1);
1731
1732 written = out->size - out->use;
1733 toconv = in->use;
1734 if (toconv * 2 >= written) {
1735 xmlBufferGrow(out, toconv);
1736 written = out->size - out->use - 1;
1737 }
1738
1739 /*
1740 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1741 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001742 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001743 */
1744 written = 45;
1745
1746 if (handler->input != NULL) {
1747 ret = handler->input(&out->content[out->use], &written,
1748 in->content, &toconv);
1749 xmlBufferShrink(in, toconv);
1750 out->use += written;
1751 out->content[out->use] = 0;
1752 }
1753#ifdef LIBXML_ICONV_ENABLED
1754 else if (handler->iconv_in != NULL) {
1755 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1756 &written, in->content, &toconv);
1757 xmlBufferShrink(in, toconv);
1758 out->use += written;
1759 out->content[out->use] = 0;
1760 if (ret == -1) ret = -3;
1761 }
1762#endif /* LIBXML_ICONV_ENABLED */
1763#ifdef DEBUG_ENCODING
1764 switch (ret) {
1765 case 0:
1766 xmlGenericError(xmlGenericErrorContext,
1767 "converted %d bytes to %d bytes of input\n",
1768 toconv, written);
1769 break;
1770 case -1:
1771 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1772 toconv, written, in->use);
1773 break;
1774 case -2:
1775 xmlGenericError(xmlGenericErrorContext,
1776 "input conversion failed due to input error\n");
1777 break;
1778 case -3:
1779 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1780 toconv, written, in->use);
1781 break;
1782 default:
1783 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1784 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001785#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001786 /*
1787 * Ignore when input buffer is not on a boundary
1788 */
1789 if (ret == -3) ret = 0;
1790 if (ret == -1) ret = 0;
1791 return(ret);
1792}
1793
1794/**
1795 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001796 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001797 * @out: an xmlBuffer for the output.
1798 * @in: an xmlBuffer for the input
1799 *
1800 * Generic front-end for the encoding handler input function
1801 *
1802 * Returns the number of byte written if success, or
1803 * -1 general error
1804 * -2 if the transcoding fails (for *in is not valid utf8 string or
1805 * the result of transformation can't fit into the encoding we want), or
1806 */
1807int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001808xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1809 xmlBufferPtr in)
1810{
Owen Taylor3473f882001-02-23 17:55:21 +00001811 int ret = -2;
1812 int written;
1813 int toconv;
1814
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001815 if (handler == NULL)
1816 return (-1);
1817 if (out == NULL)
1818 return (-1);
1819 if (in == NULL)
1820 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001821
1822 toconv = in->use;
1823 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001824 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 written = out->size - out->use;
1826 if (toconv * 2 >= written) {
1827 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001828 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001829 }
1830 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001831 ret = handler->input(&out->content[out->use], &written,
1832 in->content, &toconv);
1833 xmlBufferShrink(in, toconv);
1834 out->use += written;
1835 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001836 }
1837#ifdef LIBXML_ICONV_ENABLED
1838 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001839 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1840 &written, in->content, &toconv);
1841 xmlBufferShrink(in, toconv);
1842 out->use += written;
1843 out->content[out->use] = 0;
1844 if (ret == -1)
1845 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001846 }
1847#endif /* LIBXML_ICONV_ENABLED */
1848 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001849 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001850#ifdef DEBUG_ENCODING
1851 xmlGenericError(xmlGenericErrorContext,
1852 "converted %d bytes to %d bytes of input\n",
1853 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001854#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001855 break;
1856 case -1:
1857#ifdef DEBUG_ENCODING
1858 xmlGenericError(xmlGenericErrorContext,
1859 "converted %d bytes to %d bytes of input, %d left\n",
1860 toconv, written, in->use);
1861#endif
1862 break;
1863 case -3:
1864#ifdef DEBUG_ENCODING
1865 xmlGenericError(xmlGenericErrorContext,
1866 "converted %d bytes to %d bytes of input, %d left\n",
1867 toconv, written, in->use);
1868#endif
1869 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001870 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001871 xmlGenericError(xmlGenericErrorContext,
1872 "input conversion failed due to input error\n");
1873 xmlGenericError(xmlGenericErrorContext,
1874 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1875 in->content[0], in->content[1],
1876 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001877 }
1878 /*
1879 * Ignore when input buffer is not on a boundary
1880 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001881 if (ret == -3)
1882 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001883 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001884}
1885
1886/**
1887 * xmlCharEncOutFunc:
1888 * @handler: char enconding transformation data structure
1889 * @out: an xmlBuffer for the output.
1890 * @in: an xmlBuffer for the input
1891 *
1892 * Generic front-end for the encoding handler output function
1893 * a first call with @in == NULL has to be made firs to initiate the
1894 * output in case of non-stateless encoding needing to initiate their
1895 * state or the output (like the BOM in UTF16).
1896 * In case of UTF8 sequence conversion errors for the given encoder,
1897 * the content will be automatically remapped to a CharRef sequence.
1898 *
1899 * Returns the number of byte written if success, or
1900 * -1 general error
1901 * -2 if the transcoding fails (for *in is not valid utf8 string or
1902 * the result of transformation can't fit into the encoding we want), or
1903 */
1904int
1905xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1906 xmlBufferPtr in) {
1907 int ret = -2;
1908 int written;
1909 int writtentot = 0;
1910 int toconv;
1911 int output = 0;
1912
1913 if (handler == NULL) return(-1);
1914 if (out == NULL) return(-1);
1915
1916retry:
1917
1918 written = out->size - out->use;
1919
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001920 if (written > 0)
1921 written--; /* Gennady: count '/0' */
1922
Owen Taylor3473f882001-02-23 17:55:21 +00001923 /*
1924 * First specific handling of in = NULL, i.e. the initialization call
1925 */
1926 if (in == NULL) {
1927 toconv = 0;
1928 if (handler->output != NULL) {
1929 ret = handler->output(&out->content[out->use], &written,
1930 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001931 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001932 out->use += written;
1933 out->content[out->use] = 0;
1934 }
Owen Taylor3473f882001-02-23 17:55:21 +00001935 }
1936#ifdef LIBXML_ICONV_ENABLED
1937 else if (handler->iconv_out != NULL) {
1938 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1939 &written, NULL, &toconv);
1940 out->use += written;
1941 out->content[out->use] = 0;
1942 }
1943#endif /* LIBXML_ICONV_ENABLED */
1944#ifdef DEBUG_ENCODING
1945 xmlGenericError(xmlGenericErrorContext,
1946 "initialized encoder\n");
1947#endif
1948 return(0);
1949 }
1950
1951 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001952 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001953 */
1954 toconv = in->use;
1955 if (toconv == 0)
1956 return(0);
1957 if (toconv * 2 >= written) {
1958 xmlBufferGrow(out, toconv * 2);
1959 written = out->size - out->use - 1;
1960 }
1961 if (handler->output != NULL) {
1962 ret = handler->output(&out->content[out->use], &written,
1963 in->content, &toconv);
1964 xmlBufferShrink(in, toconv);
1965 out->use += written;
1966 writtentot += written;
1967 out->content[out->use] = 0;
1968 }
1969#ifdef LIBXML_ICONV_ENABLED
1970 else if (handler->iconv_out != NULL) {
1971 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1972 &written, in->content, &toconv);
1973 xmlBufferShrink(in, toconv);
1974 out->use += written;
1975 writtentot += written;
1976 out->content[out->use] = 0;
1977 if (ret == -1) {
1978 if (written > 0) {
1979 /*
1980 * Can be a limitation of iconv
1981 */
1982 goto retry;
1983 }
1984 ret = -3;
1985 }
1986 }
1987#endif /* LIBXML_ICONV_ENABLED */
1988 else {
1989 xmlGenericError(xmlGenericErrorContext,
1990 "xmlCharEncOutFunc: no output function !\n");
1991 return(-1);
1992 }
1993
1994 if (ret >= 0) output += ret;
1995
1996 /*
1997 * Attempt to handle error cases
1998 */
1999 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002000 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002001#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002002 xmlGenericError(xmlGenericErrorContext,
2003 "converted %d bytes to %d bytes of output\n",
2004 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002005#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002006 break;
2007 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002008#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002009 xmlGenericError(xmlGenericErrorContext,
2010 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002011#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002012 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002013 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002014#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002015 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2016 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002017#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002018 break;
2019 case -2: {
2020 int len = in->use;
2021 const xmlChar *utf = (const xmlChar *) in->content;
2022 int cur;
2023
2024 cur = xmlGetUTF8Char(utf, &len);
2025 if (cur > 0) {
2026 xmlChar charref[20];
2027
2028#ifdef DEBUG_ENCODING
2029 xmlGenericError(xmlGenericErrorContext,
2030 "handling output conversion error\n");
2031 xmlGenericError(xmlGenericErrorContext,
2032 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2033 in->content[0], in->content[1],
2034 in->content[2], in->content[3]);
2035#endif
2036 /*
2037 * Removes the UTF8 sequence, and replace it by a charref
2038 * and continue the transcoding phase, hoping the error
2039 * did not mangle the encoder state.
2040 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002041 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002042 xmlBufferShrink(in, len);
2043 xmlBufferAddHead(in, charref, -1);
2044
2045 goto retry;
2046 } else {
2047 xmlGenericError(xmlGenericErrorContext,
2048 "output conversion failed due to conv error\n");
2049 xmlGenericError(xmlGenericErrorContext,
2050 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2051 in->content[0], in->content[1],
2052 in->content[2], in->content[3]);
2053 in->content[0] = ' ';
2054 }
2055 break;
2056 }
2057 }
2058 return(ret);
2059}
2060
2061/**
2062 * xmlCharEncCloseFunc:
2063 * @handler: char enconding transformation data structure
2064 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002065 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002066 *
2067 * Returns 0 if success, or -1 in case of error
2068 */
2069int
2070xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2071 int ret = 0;
2072 if (handler == NULL) return(-1);
2073 if (handler->name == NULL) return(-1);
2074#ifdef LIBXML_ICONV_ENABLED
2075 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002076 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002077 * and the associated icon resources.
2078 */
2079 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2080 if (handler->name != NULL)
2081 xmlFree(handler->name);
2082 handler->name = NULL;
2083 if (handler->iconv_out != NULL) {
2084 if (iconv_close(handler->iconv_out))
2085 ret = -1;
2086 handler->iconv_out = NULL;
2087 }
2088 if (handler->iconv_in != NULL) {
2089 if (iconv_close(handler->iconv_in))
2090 ret = -1;
2091 handler->iconv_in = NULL;
2092 }
2093 xmlFree(handler);
2094 }
2095#endif /* LIBXML_ICONV_ENABLED */
2096#ifdef DEBUG_ENCODING
2097 if (ret)
2098 xmlGenericError(xmlGenericErrorContext,
2099 "failed to close the encoding handler\n");
2100 else
2101 xmlGenericError(xmlGenericErrorContext,
2102 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002103#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002104
Owen Taylor3473f882001-02-23 17:55:21 +00002105 return(ret);
2106}
2107
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002108#ifndef LIBXML_ICONV_ENABLED
2109#ifdef LIBXML_ISO8859X_ENABLED
2110
2111/**
2112 * UTF8ToISO8859x:
2113 * @out: a pointer to an array of bytes to store the result
2114 * @outlen: the length of @out
2115 * @in: a pointer to an array of UTF-8 chars
2116 * @inlen: the length of @in
2117 * @xlattable: the 2-level transcoding table
2118 *
2119 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2120 * block of chars out.
2121 *
2122 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2123 * The value of @inlen after return is the number of octets consumed
2124 * as the return value is positive, else unpredictable.
2125 * The value of @outlen after return is the number of ocetes consumed.
2126 */
2127static int
2128UTF8ToISO8859x(unsigned char* out, int *outlen,
2129 const unsigned char* in, int *inlen,
2130 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002131 const unsigned char* outstart = out;
2132 const unsigned char* inend;
2133 const unsigned char* instart = in;
2134
2135 if (in == NULL) {
2136 /*
2137 * initialization nothing to do
2138 */
2139 *outlen = 0;
2140 *inlen = 0;
2141 return(0);
2142 }
2143 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002144 while (in < inend) {
2145 unsigned char d = *in++;
2146 if (d < 0x80) {
2147 *out++ = d;
2148 } else if (d < 0xC0) {
2149 /* trailing byte in leading position */
2150 *outlen = out - outstart;
2151 *inlen = in - instart - 1;
2152 return(-2);
2153 } else if (d < 0xE0) {
2154 unsigned char c;
2155 if (!(in < inend)) {
2156 /* trailing byte not in input buffer */
2157 *outlen = out - outstart;
2158 *inlen = in - instart - 1;
2159 return(-2);
2160 }
2161 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002162 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002163 /* not a trailing byte */
2164 *outlen = out - outstart;
2165 *inlen = in - instart - 2;
2166 return(-2);
2167 }
2168 c = c & 0x3F;
2169 d = d & 0x1F;
2170 d = xlattable [48 + c + xlattable [d] * 64];
2171 if (d == 0) {
2172 /* not in character set */
2173 *outlen = out - outstart;
2174 *inlen = in - instart - 2;
2175 return(-2);
2176 }
2177 *out++ = d;
2178 } else if (d < 0xF0) {
2179 unsigned char c1;
2180 unsigned char c2;
2181 if (!(in < inend - 1)) {
2182 /* trailing bytes not in input buffer */
2183 *outlen = out - outstart;
2184 *inlen = in - instart - 1;
2185 return(-2);
2186 }
2187 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002188 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002189 /* not a trailing byte (c1) */
2190 *outlen = out - outstart;
2191 *inlen = in - instart - 2;
2192 return(-2);
2193 }
2194 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002195 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002196 /* not a trailing byte (c2) */
2197 *outlen = out - outstart;
2198 *inlen = in - instart - 2;
2199 return(-2);
2200 }
2201 c1 = c1 & 0x3F;
2202 c2 = c2 & 0x3F;
2203 d = d & 0x0F;
2204 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2205 if (d == 0) {
2206 /* not in character set */
2207 *outlen = out - outstart;
2208 *inlen = in - instart - 3;
2209 return(-2);
2210 }
2211 *out++ = d;
2212 } else {
2213 /* cannot transcode >= U+010000 */
2214 *outlen = out - outstart;
2215 *inlen = in - instart - 1;
2216 return(-2);
2217 }
2218 }
2219 *outlen = out - outstart;
2220 *inlen = in - instart;
2221 return(0);
2222}
2223
2224/**
2225 * ISO8859xToUTF8
2226 * @out: a pointer to an array of bytes to store the result
2227 * @outlen: the length of @out
2228 * @in: a pointer to an array of ISO Latin 1 chars
2229 * @inlen: the length of @in
2230 *
2231 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2232 * block of chars out.
2233 * Returns 0 if success, or -1 otherwise
2234 * The value of @inlen after return is the number of octets consumed
2235 * The value of @outlen after return is the number of ocetes produced.
2236 */
2237static int
2238ISO8859xToUTF8(unsigned char* out, int *outlen,
2239 const unsigned char* in, int *inlen,
2240 unsigned short const *unicodetable) {
2241 unsigned char* outstart = out;
2242 unsigned char* outend = out + *outlen;
2243 const unsigned char* instart = in;
2244 const unsigned char* inend = in + *inlen;
2245 const unsigned char* instop = inend;
2246 unsigned int c = *in;
2247
2248 while (in < inend && out < outend - 1) {
2249 if (c >= 0x80) {
2250 c = unicodetable [c - 0x80];
2251 if (c == 0) {
2252 /* undefined code point */
2253 *outlen = out - outstart;
2254 *inlen = in - instart;
2255 return (-1);
2256 }
2257 if (c < 0x800) {
2258 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2259 *out++ = (c & 0x3F) | 0x80;
2260 } else {
2261 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2262 *out++ = ((c >> 6) & 0x3F) | 0x80;
2263 *out++ = (c & 0x3F) | 0x80;
2264 }
2265 ++in;
2266 c = *in;
2267 }
2268 if (instop - in > outend - out) instop = in + (outend - out);
2269 while (c < 0x80 && in < instop) {
2270 *out++ = c;
2271 ++in;
2272 c = *in;
2273 }
2274 }
2275 if (in < inend && out < outend && c < 0x80) {
2276 *out++ = c;
2277 ++in;
2278 }
2279 *outlen = out - outstart;
2280 *inlen = in - instart;
2281 return (0);
2282}
2283
2284
2285/************************************************************************
2286 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2287 ************************************************************************/
2288
2289static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2290 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2291 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2292 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2293 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2294 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2295 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2296 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2297 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2298 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2299 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2300 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2301 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2302 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2303 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2304 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2305 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2306};
2307
2308static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2309 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2316 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2317 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2318 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2319 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2320 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2321 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2324 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2325 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2328 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2329 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2330 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2331 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2332 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2333 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2334 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2335 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2336};
2337
2338static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2339 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2340 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2341 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2342 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2343 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2344 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2345 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2346 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2347 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2348 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2349 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2350 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2351 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2352 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2353 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2354 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2355};
2356
2357static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2358 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2365 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2366 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2367 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2368 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2369 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2371 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2372 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2374 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2377 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2385 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2386 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2387 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2388 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2389};
2390
2391static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2392 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2393 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2394 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2395 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2396 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2397 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2398 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2399 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2400 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2401 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2402 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2403 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2404 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2405 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2406 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2407 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2408};
2409
2410static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2411 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2418 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2419 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2420 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2421 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2422 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2423 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2424 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2425 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2426 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2427 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2428 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2429 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2430 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2434 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2435 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2436 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2437 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2438};
2439
2440static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2441 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2442 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2443 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2444 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2445 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2446 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2447 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2448 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2449 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2450 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2451 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2452 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2453 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2454 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2455 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2456 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2457};
2458
2459static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2460 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2461 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2462 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2467 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2468 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2469 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2471 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2472 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2473 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2474 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2475 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2476 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2479 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2484 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2487};
2488
2489static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2490 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2491 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2492 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2493 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2494 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2495 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2496 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2497 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2498 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2499 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2500 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2501 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2502 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2503 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2504 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2505 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2506};
2507
2508static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2509 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2516 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2517 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2518 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2520 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2522 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2523 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2524 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2526 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2527 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2528 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2529 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2532};
2533
2534static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2535 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2536 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2537 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2538 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2539 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2540 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2541 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2542 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2543 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2544 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2545 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2546 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2547 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2548 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2549 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2550 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2551};
2552
2553static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2554 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2562 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2563 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2564 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2565 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2568 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2570 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2573 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2577 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2578 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2579 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2580 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2581 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2585};
2586
2587static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2588 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2589 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2590 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2591 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2592 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2593 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2594 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2595 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2596 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2597 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2598 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2599 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2600 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2601 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2602 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2603 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2604};
2605
2606static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2607 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2615 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2616 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2617 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2618 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2623 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2625 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2626 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2631 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2635 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2636 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2638};
2639
2640static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2641 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2642 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2643 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2644 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2645 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2646 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2647 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2648 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2649 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2650 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2651 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2652 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2653 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2654 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2655 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2656 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2657};
2658
2659static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2660 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2667 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2668 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2669 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2670 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2671 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2672 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2673 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2674 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2683};
2684
2685static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2686 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2687 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2688 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2689 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2690 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2691 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2692 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2693 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2694 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2695 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2696 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2697 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2698 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2699 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2700 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2701 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2702};
2703
2704static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2705 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2713 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2714 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2715 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2716 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2717 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2718 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2719 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2723 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2724 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2733 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2734 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2735 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2736};
2737
2738static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2739 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2740 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2741 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2742 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2743 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2744 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2745 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2746 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2747 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2748 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2749 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2750 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2751 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2752 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2753 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2754 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2755};
2756
2757static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2758 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2766 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2767 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2772 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2773 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2774 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2775 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2776 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2777 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2782 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785};
2786
2787static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2788 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2789 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2790 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2791 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2792 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2793 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2794 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2795 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2796 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2797 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2798 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2799 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2800 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2801 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2802 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2803 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2804};
2805
2806static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2807 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2809 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2815 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2816 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2817 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2818 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2821 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2827 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2828 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2830 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2832 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2833 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2834 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2835 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2836 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2837 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2838};
2839
2840static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2841 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2842 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2843 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2844 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2845 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2846 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2847 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2848 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2849 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2850 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2851 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2852 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2853 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2854 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2855 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2856 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2857};
2858
2859static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2860 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2868 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2869 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2870 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2875 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2885 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2895 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2900 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2901 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2902 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2903};
2904
2905static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
2906 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2907 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2908 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2909 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2910 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
2911 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2912 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
2913 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
2914 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2915 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2916 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2917 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2918 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2919 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2920 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2921 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
2922};
2923
2924static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
2925 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2929 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2930 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2933 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2934 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
2935 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
2936 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
2948 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2949 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2950 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2951 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
2952};
2953
2954static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
2955 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2956 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2957 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2958 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2959 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
2960 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
2961 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
2962 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
2963 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
2964 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2965 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
2966 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
2967 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
2968 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2969 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
2970 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
2971};
2972
2973static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
2974 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2982 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2983 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
2984 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
2985 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
2986 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2991 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
2993 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3003 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3005 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3010 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3011 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3012 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3013};
3014
3015
3016/*
3017 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3018 */
3019
3020static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3021 const unsigned char* in, int *inlen) {
3022 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3023}
3024static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3025 const unsigned char* in, int *inlen) {
3026 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3027}
3028
3029static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3030 const unsigned char* in, int *inlen) {
3031 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3032}
3033static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3034 const unsigned char* in, int *inlen) {
3035 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3036}
3037
3038static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3039 const unsigned char* in, int *inlen) {
3040 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3041}
3042static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3043 const unsigned char* in, int *inlen) {
3044 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3045}
3046
3047static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3048 const unsigned char* in, int *inlen) {
3049 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3050}
3051static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3052 const unsigned char* in, int *inlen) {
3053 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3054}
3055
3056static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3057 const unsigned char* in, int *inlen) {
3058 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3059}
3060static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3061 const unsigned char* in, int *inlen) {
3062 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3063}
3064
3065static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3066 const unsigned char* in, int *inlen) {
3067 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3068}
3069static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3070 const unsigned char* in, int *inlen) {
3071 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3072}
3073
3074static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3075 const unsigned char* in, int *inlen) {
3076 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3077}
3078static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3079 const unsigned char* in, int *inlen) {
3080 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3081}
3082
3083static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3084 const unsigned char* in, int *inlen) {
3085 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3086}
3087static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3088 const unsigned char* in, int *inlen) {
3089 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3090}
3091
3092static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3093 const unsigned char* in, int *inlen) {
3094 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3095}
3096static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3097 const unsigned char* in, int *inlen) {
3098 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3099}
3100
3101static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3102 const unsigned char* in, int *inlen) {
3103 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3104}
3105static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3106 const unsigned char* in, int *inlen) {
3107 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3108}
3109
3110static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3111 const unsigned char* in, int *inlen) {
3112 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3113}
3114static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3115 const unsigned char* in, int *inlen) {
3116 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3117}
3118
3119static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3120 const unsigned char* in, int *inlen) {
3121 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3122}
3123static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3124 const unsigned char* in, int *inlen) {
3125 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3126}
3127
3128static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3129 const unsigned char* in, int *inlen) {
3130 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3131}
3132static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3133 const unsigned char* in, int *inlen) {
3134 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3135}
3136
3137static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3138 const unsigned char* in, int *inlen) {
3139 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3140}
3141static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3142 const unsigned char* in, int *inlen) {
3143 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3144}
3145
3146static void
3147xmlRegisterCharEncodingHandlersISO8859x (void) {
3148 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3149 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3150 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3151 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3152 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3153 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3154 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3155 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3156 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3157 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3158 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3159 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3160 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3161 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3162}
3163
3164#endif
3165#endif
3166
3167