blob: bc8a435b04ab8c6385e4db13399b8977ac4cb5ac [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
128 return(0);
129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
158 if (in == NULL) {
159 /*
160 * initialization nothing to do
161 */
162 *outlen = 0;
163 *inlen = 0;
164 return(0);
165 }
166 inend = in + (*inlen);
167 outend = out + (*outlen);
168 while (in < inend) {
169 d = *in++;
170 if (d < 0x80) { c= d; trailing= 0; }
171 else if (d < 0xC0) {
172 /* trailing byte in leading position */
173 *outlen = out - outstart;
174 *inlen = processed - instart;
175 return(-2);
176 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
177 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
178 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
179 else {
180 /* no chance for this in Ascii */
181 *outlen = out - outstart;
182 *inlen = processed - instart;
183 return(-2);
184 }
185
186 if (inend - in < trailing) {
187 break;
188 }
189
190 for ( ; trailing; trailing--) {
191 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
192 break;
193 c <<= 6;
194 c |= d & 0x3F;
195 }
196
197 /* assertion: c is a single UTF-4 value */
198 if (c < 0x80) {
199 if (out >= outend)
200 break;
201 *out++ = c;
202 } else {
203 /* no chance for this in Ascii */
204 *outlen = out - outstart;
205 *inlen = processed - instart;
206 return(-2);
207 }
208 processed = in;
209 }
210 *outlen = out - outstart;
211 *inlen = processed - instart;
212 return(0);
213}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000214#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000215
216/**
217 * isolat1ToUTF8:
218 * @out: a pointer to an array of bytes to store the result
219 * @outlen: the length of @out
220 * @in: a pointer to an array of ISO Latin 1 chars
221 * @inlen: the length of @in
222 *
223 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
224 * block of chars out.
225 * Returns 0 if success, or -1 otherwise
226 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000227 * if the return value is positive, else unpredictable.
228 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000229 */
230int
231isolat1ToUTF8(unsigned char* out, int *outlen,
232 const unsigned char* in, int *inlen) {
233 unsigned char* outstart = out;
234 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000235 unsigned char* outend = out + *outlen;
236 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000237 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000238
239 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000240 instop = inend;
241
242 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000243 if (*in >= 0x80) {
244 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
245 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000246 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000247 }
248 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000249 while (in < instop && *in < 0x80) {
250 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 }
252 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000253 if (in < inend && out < outend && *in < 0x80) {
254 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000255 }
256 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000257 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000258 return(0);
259}
260
Daniel Veillard81601f92003-01-14 13:42:37 +0000261/**
262 * UTF8ToUTF8:
263 * @out: a pointer to an array of bytes to store the result
264 * @outlen: the length of @out
265 * @inb: a pointer to an array of UTF-8 chars
266 * @inlenb: the length of @in in UTF-8 chars
267 *
268 * No op copy operation for UTF8 handling.
269 *
William M. Brackf9415e42003-11-28 09:39:10 +0000270 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000271 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000272 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000273 */
274static int
275UTF8ToUTF8(unsigned char* out, int *outlen,
276 const unsigned char* inb, int *inlenb)
277{
278 int len;
279
280 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
281 return(-1);
282 if (*outlen > *inlenb) {
283 len = *inlenb;
284 } else {
285 len = *outlen;
286 }
287 if (len < 0)
288 return(-1);
289
290 memcpy(out, inb, len);
291
292 *outlen = len;
293 *inlenb = len;
294 return(0);
295}
296
Daniel Veillarde72c7562002-05-31 09:47:30 +0000297
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000298#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000299/**
300 * UTF8Toisolat1:
301 * @out: a pointer to an array of bytes to store the result
302 * @outlen: the length of @out
303 * @in: a pointer to an array of UTF-8 chars
304 * @inlen: the length of @in
305 *
306 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
307 * block of chars out.
308 *
309 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
310 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000311 * if the return value is positive, else unpredictable.
312 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000313 */
314int
315UTF8Toisolat1(unsigned char* out, int *outlen,
316 const unsigned char* in, int *inlen) {
317 const unsigned char* processed = in;
318 const unsigned char* outend;
319 const unsigned char* outstart = out;
320 const unsigned char* instart = in;
321 const unsigned char* inend;
322 unsigned int c, d;
323 int trailing;
324
325 if (in == NULL) {
326 /*
327 * initialization nothing to do
328 */
329 *outlen = 0;
330 *inlen = 0;
331 return(0);
332 }
333 inend = in + (*inlen);
334 outend = out + (*outlen);
335 while (in < inend) {
336 d = *in++;
337 if (d < 0x80) { c= d; trailing= 0; }
338 else if (d < 0xC0) {
339 /* trailing byte in leading position */
340 *outlen = out - outstart;
341 *inlen = processed - instart;
342 return(-2);
343 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
344 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
345 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
346 else {
347 /* no chance for this in IsoLat1 */
348 *outlen = out - outstart;
349 *inlen = processed - instart;
350 return(-2);
351 }
352
353 if (inend - in < trailing) {
354 break;
355 }
356
357 for ( ; trailing; trailing--) {
358 if (in >= inend)
359 break;
360 if (((d= *in++) & 0xC0) != 0x80) {
361 *outlen = out - outstart;
362 *inlen = processed - instart;
363 return(-2);
364 }
365 c <<= 6;
366 c |= d & 0x3F;
367 }
368
369 /* assertion: c is a single UTF-4 value */
370 if (c <= 0xFF) {
371 if (out >= outend)
372 break;
373 *out++ = c;
374 } else {
375 /* no chance for this in IsoLat1 */
376 *outlen = out - outstart;
377 *inlen = processed - instart;
378 return(-2);
379 }
380 processed = in;
381 }
382 *outlen = out - outstart;
383 *inlen = processed - instart;
384 return(0);
385}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000386#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000387
388/**
389 * UTF16LEToUTF8:
390 * @out: a pointer to an array of bytes to store the result
391 * @outlen: the length of @out
392 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
393 * @inlenb: the length of @in in UTF-16LE chars
394 *
395 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000396 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000397 * is the same between the native type of this machine and the
398 * inputed one.
399 *
William M. Brackf9415e42003-11-28 09:39:10 +0000400 * Returns the number of bytes written, or -1 if lack of space, or -2
401 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000402 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000403 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000404 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000405static int
Owen Taylor3473f882001-02-23 17:55:21 +0000406UTF16LEToUTF8(unsigned char* out, int *outlen,
407 const unsigned char* inb, int *inlenb)
408{
409 unsigned char* outstart = out;
410 const unsigned char* processed = inb;
411 unsigned char* outend = out + *outlen;
412 unsigned short* in = (unsigned short*) inb;
413 unsigned short* inend;
414 unsigned int c, d, inlen;
415 unsigned char *tmp;
416 int bits;
417
418 if ((*inlenb % 2) == 1)
419 (*inlenb)--;
420 inlen = *inlenb / 2;
421 inend = in + inlen;
422 while ((in < inend) && (out - outstart + 5 < *outlen)) {
423 if (xmlLittleEndian) {
424 c= *in++;
425 } else {
426 tmp = (unsigned char *) in;
427 c = *tmp++;
428 c = c | (((unsigned int)*tmp) << 8);
429 in++;
430 }
431 if ((c & 0xFC00) == 0xD800) { /* surrogates */
432 if (in >= inend) { /* (in > inend) shouldn't happens */
433 break;
434 }
435 if (xmlLittleEndian) {
436 d = *in++;
437 } else {
438 tmp = (unsigned char *) in;
439 d = *tmp++;
440 d = d | (((unsigned int)*tmp) << 8);
441 in++;
442 }
443 if ((d & 0xFC00) == 0xDC00) {
444 c &= 0x03FF;
445 c <<= 10;
446 c |= d & 0x03FF;
447 c += 0x10000;
448 }
449 else {
450 *outlen = out - outstart;
451 *inlenb = processed - inb;
452 return(-2);
453 }
454 }
455
456 /* assertion: c is a single UTF-4 value */
457 if (out >= outend)
458 break;
459 if (c < 0x80) { *out++= c; bits= -6; }
460 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
461 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
462 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
463
464 for ( ; bits >= 0; bits-= 6) {
465 if (out >= outend)
466 break;
467 *out++= ((c >> bits) & 0x3F) | 0x80;
468 }
469 processed = (const unsigned char*) in;
470 }
471 *outlen = out - outstart;
472 *inlenb = processed - inb;
473 return(0);
474}
475
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000476#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000477/**
478 * UTF8ToUTF16LE:
479 * @outb: a pointer to an array of bytes to store the result
480 * @outlen: the length of @outb
481 * @in: a pointer to an array of UTF-8 chars
482 * @inlen: the length of @in
483 *
484 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
485 * block of chars out.
486 *
William M. Brackf9415e42003-11-28 09:39:10 +0000487 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000488 * if the transcoding failed.
489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000490static int
Owen Taylor3473f882001-02-23 17:55:21 +0000491UTF8ToUTF16LE(unsigned char* outb, int *outlen,
492 const unsigned char* in, int *inlen)
493{
494 unsigned short* out = (unsigned short*) outb;
495 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000496 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000497 unsigned short* outstart= out;
498 unsigned short* outend;
499 const unsigned char* inend= in+*inlen;
500 unsigned int c, d;
501 int trailing;
502 unsigned char *tmp;
503 unsigned short tmp1, tmp2;
504
William M. Brackf9415e42003-11-28 09:39:10 +0000505 /* UTF16LE encoding has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000506 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000507 *outlen = 0;
508 *inlen = 0;
509 return(0);
510 }
511 outend = out + (*outlen / 2);
512 while (in < inend) {
513 d= *in++;
514 if (d < 0x80) { c= d; trailing= 0; }
515 else if (d < 0xC0) {
516 /* trailing byte in leading position */
517 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000518 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000519 return(-2);
520 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
521 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
522 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
523 else {
524 /* no chance for this in UTF-16 */
525 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000526 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 return(-2);
528 }
529
530 if (inend - in < trailing) {
531 break;
532 }
533
534 for ( ; trailing; trailing--) {
535 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
536 break;
537 c <<= 6;
538 c |= d & 0x3F;
539 }
540
541 /* assertion: c is a single UTF-4 value */
542 if (c < 0x10000) {
543 if (out >= outend)
544 break;
545 if (xmlLittleEndian) {
546 *out++ = c;
547 } else {
548 tmp = (unsigned char *) out;
549 *tmp = c ;
550 *(tmp + 1) = c >> 8 ;
551 out++;
552 }
553 }
554 else if (c < 0x110000) {
555 if (out+1 >= outend)
556 break;
557 c -= 0x10000;
558 if (xmlLittleEndian) {
559 *out++ = 0xD800 | (c >> 10);
560 *out++ = 0xDC00 | (c & 0x03FF);
561 } else {
562 tmp1 = 0xD800 | (c >> 10);
563 tmp = (unsigned char *) out;
564 *tmp = (unsigned char) tmp1;
565 *(tmp + 1) = tmp1 >> 8;
566 out++;
567
568 tmp2 = 0xDC00 | (c & 0x03FF);
569 tmp = (unsigned char *) out;
570 *tmp = (unsigned char) tmp2;
571 *(tmp + 1) = tmp2 >> 8;
572 out++;
573 }
574 }
575 else
576 break;
577 processed = in;
578 }
579 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000580 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 return(0);
582}
583
584/**
William M. Brackf9415e42003-11-28 09:39:10 +0000585 * UTF8ToUTF16:
586 * @outb: a pointer to an array of bytes to store the result
587 * @outlen: the length of @outb
588 * @in: a pointer to an array of UTF-8 chars
589 * @inlen: the length of @in
590 *
591 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
592 * block of chars out.
593 *
594 * Returns the number of bytes written, or -1 if lack of space, or -2
595 * if the transcoding failed.
596 */
597static int
598UTF8ToUTF16(unsigned char* outb, int *outlen,
599 const unsigned char* in, int *inlen)
600{
601 if (in == NULL) {
602 /*
603 * initialization, add the Byte Order Mark for UTF-16LE
604 */
605 if (*outlen >= 2) {
606 outb[0] = 0xFF;
607 outb[1] = 0xFE;
608 *outlen = 2;
609 *inlen = 0;
610#ifdef DEBUG_ENCODING
611 xmlGenericError(xmlGenericErrorContext,
612 "Added FFFE Byte Order Mark\n");
613#endif
614 return(2);
615 }
616 *outlen = 0;
617 *inlen = 0;
618 return(0);
619 }
620 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
621}
William M. Brack030a7a12004-02-10 12:48:57 +0000622#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000623
624/**
Owen Taylor3473f882001-02-23 17:55:21 +0000625 * UTF16BEToUTF8:
626 * @out: a pointer to an array of bytes to store the result
627 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000628 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000629 * @inlenb: the length of @in in UTF-16 chars
630 *
631 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000632 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * is the same between the native type of this machine and the
634 * inputed one.
635 *
William M. Brackf9415e42003-11-28 09:39:10 +0000636 * Returns the number of bytes written, or -1 if lack of space, or -2
637 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000639 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000640 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000641static int
Owen Taylor3473f882001-02-23 17:55:21 +0000642UTF16BEToUTF8(unsigned char* out, int *outlen,
643 const unsigned char* inb, int *inlenb)
644{
645 unsigned char* outstart = out;
646 const unsigned char* processed = inb;
647 unsigned char* outend = out + *outlen;
648 unsigned short* in = (unsigned short*) inb;
649 unsigned short* inend;
650 unsigned int c, d, inlen;
651 unsigned char *tmp;
652 int bits;
653
654 if ((*inlenb % 2) == 1)
655 (*inlenb)--;
656 inlen = *inlenb / 2;
657 inend= in + inlen;
658 while (in < inend) {
659 if (xmlLittleEndian) {
660 tmp = (unsigned char *) in;
661 c = *tmp++;
662 c = c << 8;
663 c = c | (unsigned int) *tmp;
664 in++;
665 } else {
666 c= *in++;
667 }
668 if ((c & 0xFC00) == 0xD800) { /* surrogates */
669 if (in >= inend) { /* (in > inend) shouldn't happens */
670 *outlen = out - outstart;
671 *inlenb = processed - inb;
672 return(-2);
673 }
674 if (xmlLittleEndian) {
675 tmp = (unsigned char *) in;
676 d = *tmp++;
677 d = d << 8;
678 d = d | (unsigned int) *tmp;
679 in++;
680 } else {
681 d= *in++;
682 }
683 if ((d & 0xFC00) == 0xDC00) {
684 c &= 0x03FF;
685 c <<= 10;
686 c |= d & 0x03FF;
687 c += 0x10000;
688 }
689 else {
690 *outlen = out - outstart;
691 *inlenb = processed - inb;
692 return(-2);
693 }
694 }
695
696 /* assertion: c is a single UTF-4 value */
697 if (out >= outend)
698 break;
699 if (c < 0x80) { *out++= c; bits= -6; }
700 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
701 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
702 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
703
704 for ( ; bits >= 0; bits-= 6) {
705 if (out >= outend)
706 break;
707 *out++= ((c >> bits) & 0x3F) | 0x80;
708 }
709 processed = (const unsigned char*) in;
710 }
711 *outlen = out - outstart;
712 *inlenb = processed - inb;
713 return(0);
714}
715
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000716#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000717/**
718 * UTF8ToUTF16BE:
719 * @outb: a pointer to an array of bytes to store the result
720 * @outlen: the length of @outb
721 * @in: a pointer to an array of UTF-8 chars
722 * @inlen: the length of @in
723 *
724 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
725 * block of chars out.
726 *
727 * Returns the number of byte written, or -1 by lack of space, or -2
728 * if the transcoding failed.
729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000730static int
Owen Taylor3473f882001-02-23 17:55:21 +0000731UTF8ToUTF16BE(unsigned char* outb, int *outlen,
732 const unsigned char* in, int *inlen)
733{
734 unsigned short* out = (unsigned short*) outb;
735 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000736 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000737 unsigned short* outstart= out;
738 unsigned short* outend;
739 const unsigned char* inend= in+*inlen;
740 unsigned int c, d;
741 int trailing;
742 unsigned char *tmp;
743 unsigned short tmp1, tmp2;
744
William M. Brackf9415e42003-11-28 09:39:10 +0000745 /* UTF-16BE has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000746 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000747 *outlen = 0;
748 *inlen = 0;
749 return(0);
750 }
751 outend = out + (*outlen / 2);
752 while (in < inend) {
753 d= *in++;
754 if (d < 0x80) { c= d; trailing= 0; }
755 else if (d < 0xC0) {
756 /* trailing byte in leading position */
757 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000758 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000759 return(-2);
760 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
761 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
762 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
763 else {
764 /* no chance for this in UTF-16 */
765 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000766 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000767 return(-2);
768 }
769
770 if (inend - in < trailing) {
771 break;
772 }
773
774 for ( ; trailing; trailing--) {
775 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
776 c <<= 6;
777 c |= d & 0x3F;
778 }
779
780 /* assertion: c is a single UTF-4 value */
781 if (c < 0x10000) {
782 if (out >= outend) break;
783 if (xmlLittleEndian) {
784 tmp = (unsigned char *) out;
785 *tmp = c >> 8;
786 *(tmp + 1) = c;
787 out++;
788 } else {
789 *out++ = c;
790 }
791 }
792 else if (c < 0x110000) {
793 if (out+1 >= outend) break;
794 c -= 0x10000;
795 if (xmlLittleEndian) {
796 tmp1 = 0xD800 | (c >> 10);
797 tmp = (unsigned char *) out;
798 *tmp = tmp1 >> 8;
799 *(tmp + 1) = (unsigned char) tmp1;
800 out++;
801
802 tmp2 = 0xDC00 | (c & 0x03FF);
803 tmp = (unsigned char *) out;
804 *tmp = tmp2 >> 8;
805 *(tmp + 1) = (unsigned char) tmp2;
806 out++;
807 } else {
808 *out++ = 0xD800 | (c >> 10);
809 *out++ = 0xDC00 | (c & 0x03FF);
810 }
811 }
812 else
813 break;
814 processed = in;
815 }
816 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000817 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000818 return(0);
819}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000820#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000821
Daniel Veillard97ac1312001-05-30 19:14:17 +0000822/************************************************************************
823 * *
824 * Generic encoding handling routines *
825 * *
826 ************************************************************************/
827
Owen Taylor3473f882001-02-23 17:55:21 +0000828/**
829 * xmlDetectCharEncoding:
830 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000831 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @len: pointer to the length of the buffer
833 *
834 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000835 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000836 *
837 * Returns one of the XML_CHAR_ENCODING_... values.
838 */
839xmlCharEncoding
840xmlDetectCharEncoding(const unsigned char* in, int len)
841{
842 if (len >= 4) {
843 if ((in[0] == 0x00) && (in[1] == 0x00) &&
844 (in[2] == 0x00) && (in[3] == 0x3C))
845 return(XML_CHAR_ENCODING_UCS4BE);
846 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
847 (in[2] == 0x00) && (in[3] == 0x00))
848 return(XML_CHAR_ENCODING_UCS4LE);
849 if ((in[0] == 0x00) && (in[1] == 0x00) &&
850 (in[2] == 0x3C) && (in[3] == 0x00))
851 return(XML_CHAR_ENCODING_UCS4_2143);
852 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
853 (in[2] == 0x00) && (in[3] == 0x00))
854 return(XML_CHAR_ENCODING_UCS4_3412);
855 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
856 (in[2] == 0xA7) && (in[3] == 0x94))
857 return(XML_CHAR_ENCODING_EBCDIC);
858 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
859 (in[2] == 0x78) && (in[3] == 0x6D))
860 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000861 /*
862 * Although not part of the recommendation, we also
863 * attempt an "auto-recognition" of UTF-16LE and
864 * UTF-16BE encodings.
865 */
866 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
867 (in[2] == 0x3F) && (in[3] == 0x00))
868 return(XML_CHAR_ENCODING_UTF16LE);
869 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
870 (in[2] == 0x00) && (in[3] == 0x3F))
871 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000872 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000873 if (len >= 3) {
874 /*
875 * Errata on XML-1.0 June 20 2001
876 * We now allow an UTF8 encoded BOM
877 */
878 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
879 (in[2] == 0xBF))
880 return(XML_CHAR_ENCODING_UTF8);
881 }
William M. Brackf9415e42003-11-28 09:39:10 +0000882 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000883 if (len >= 2) {
884 if ((in[0] == 0xFE) && (in[1] == 0xFF))
885 return(XML_CHAR_ENCODING_UTF16BE);
886 if ((in[0] == 0xFF) && (in[1] == 0xFE))
887 return(XML_CHAR_ENCODING_UTF16LE);
888 }
889 return(XML_CHAR_ENCODING_NONE);
890}
891
892/**
893 * xmlCleanupEncodingAliases:
894 *
895 * Unregisters all aliases
896 */
897void
898xmlCleanupEncodingAliases(void) {
899 int i;
900
901 if (xmlCharEncodingAliases == NULL)
902 return;
903
904 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
905 if (xmlCharEncodingAliases[i].name != NULL)
906 xmlFree((char *) xmlCharEncodingAliases[i].name);
907 if (xmlCharEncodingAliases[i].alias != NULL)
908 xmlFree((char *) xmlCharEncodingAliases[i].alias);
909 }
910 xmlCharEncodingAliasesNb = 0;
911 xmlCharEncodingAliasesMax = 0;
912 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000913 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000914}
915
916/**
917 * xmlGetEncodingAlias:
918 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
919 *
920 * Lookup an encoding name for the given alias.
921 *
William M. Brackf9415e42003-11-28 09:39:10 +0000922 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000923 */
924const char *
925xmlGetEncodingAlias(const char *alias) {
926 int i;
927 char upper[100];
928
929 if (alias == NULL)
930 return(NULL);
931
932 if (xmlCharEncodingAliases == NULL)
933 return(NULL);
934
935 for (i = 0;i < 99;i++) {
936 upper[i] = toupper(alias[i]);
937 if (upper[i] == 0) break;
938 }
939 upper[i] = 0;
940
941 /*
942 * Walk down the list looking for a definition of the alias
943 */
944 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
945 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
946 return(xmlCharEncodingAliases[i].name);
947 }
948 }
949 return(NULL);
950}
951
952/**
953 * xmlAddEncodingAlias:
954 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
955 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
956 *
William M. Brackf9415e42003-11-28 09:39:10 +0000957 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000958 * will be overwritten.
959 *
960 * Returns 0 in case of success, -1 in case of error
961 */
962int
963xmlAddEncodingAlias(const char *name, const char *alias) {
964 int i;
965 char upper[100];
966
967 if ((name == NULL) || (alias == NULL))
968 return(-1);
969
970 for (i = 0;i < 99;i++) {
971 upper[i] = toupper(alias[i]);
972 if (upper[i] == 0) break;
973 }
974 upper[i] = 0;
975
976 if (xmlCharEncodingAliases == NULL) {
977 xmlCharEncodingAliasesNb = 0;
978 xmlCharEncodingAliasesMax = 20;
979 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
980 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
981 if (xmlCharEncodingAliases == NULL)
982 return(-1);
983 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
984 xmlCharEncodingAliasesMax *= 2;
985 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
986 xmlRealloc(xmlCharEncodingAliases,
987 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
988 }
989 /*
990 * Walk down the list looking for a definition of the alias
991 */
992 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
993 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
994 /*
995 * Replace the definition.
996 */
997 xmlFree((char *) xmlCharEncodingAliases[i].name);
998 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
999 return(0);
1000 }
1001 }
1002 /*
1003 * Add the definition
1004 */
1005 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1006 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1007 xmlCharEncodingAliasesNb++;
1008 return(0);
1009}
1010
1011/**
1012 * xmlDelEncodingAlias:
1013 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1014 *
1015 * Unregisters an encoding alias @alias
1016 *
1017 * Returns 0 in case of success, -1 in case of error
1018 */
1019int
1020xmlDelEncodingAlias(const char *alias) {
1021 int i;
1022
1023 if (alias == NULL)
1024 return(-1);
1025
1026 if (xmlCharEncodingAliases == NULL)
1027 return(-1);
1028 /*
1029 * Walk down the list looking for a definition of the alias
1030 */
1031 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1032 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1033 xmlFree((char *) xmlCharEncodingAliases[i].name);
1034 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1035 xmlCharEncodingAliasesNb--;
1036 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1037 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1038 return(0);
1039 }
1040 }
1041 return(-1);
1042}
1043
1044/**
1045 * xmlParseCharEncoding:
1046 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1047 *
William M. Brackf9415e42003-11-28 09:39:10 +00001048 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001049 * that the comparison is case insensitive accordingly to the section
1050 * [XML] 4.3.3 Character Encoding in Entities.
1051 *
1052 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1053 * if not recognized.
1054 */
1055xmlCharEncoding
1056xmlParseCharEncoding(const char* name)
1057{
1058 const char *alias;
1059 char upper[500];
1060 int i;
1061
1062 if (name == NULL)
1063 return(XML_CHAR_ENCODING_NONE);
1064
1065 /*
1066 * Do the alias resolution
1067 */
1068 alias = xmlGetEncodingAlias(name);
1069 if (alias != NULL)
1070 name = alias;
1071
1072 for (i = 0;i < 499;i++) {
1073 upper[i] = toupper(name[i]);
1074 if (upper[i] == 0) break;
1075 }
1076 upper[i] = 0;
1077
1078 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1079 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1080 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1081
1082 /*
1083 * NOTE: if we were able to parse this, the endianness of UTF16 is
1084 * already found and in use
1085 */
1086 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1087 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1088
1089 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1090 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1091 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1092
1093 /*
1094 * NOTE: if we were able to parse this, the endianness of UCS4 is
1095 * already found and in use
1096 */
1097 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1098 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1099 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1100
1101
1102 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1103 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1104 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1105
1106 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1107 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1108 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1109
1110 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1111 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1112 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1113 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1114 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1115 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1116 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1117
1118 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1119 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1120 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1121
1122#ifdef DEBUG_ENCODING
1123 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1124#endif
1125 return(XML_CHAR_ENCODING_ERROR);
1126}
1127
1128/**
1129 * xmlGetCharEncodingName:
1130 * @enc: the encoding
1131 *
1132 * The "canonical" name for XML encoding.
1133 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1134 * Section 4.3.3 Character Encoding in Entities
1135 *
1136 * Returns the canonical name for the given encoding
1137 */
1138
1139const char*
1140xmlGetCharEncodingName(xmlCharEncoding enc) {
1141 switch (enc) {
1142 case XML_CHAR_ENCODING_ERROR:
1143 return(NULL);
1144 case XML_CHAR_ENCODING_NONE:
1145 return(NULL);
1146 case XML_CHAR_ENCODING_UTF8:
1147 return("UTF-8");
1148 case XML_CHAR_ENCODING_UTF16LE:
1149 return("UTF-16");
1150 case XML_CHAR_ENCODING_UTF16BE:
1151 return("UTF-16");
1152 case XML_CHAR_ENCODING_EBCDIC:
1153 return("EBCDIC");
1154 case XML_CHAR_ENCODING_UCS4LE:
1155 return("ISO-10646-UCS-4");
1156 case XML_CHAR_ENCODING_UCS4BE:
1157 return("ISO-10646-UCS-4");
1158 case XML_CHAR_ENCODING_UCS4_2143:
1159 return("ISO-10646-UCS-4");
1160 case XML_CHAR_ENCODING_UCS4_3412:
1161 return("ISO-10646-UCS-4");
1162 case XML_CHAR_ENCODING_UCS2:
1163 return("ISO-10646-UCS-2");
1164 case XML_CHAR_ENCODING_8859_1:
1165 return("ISO-8859-1");
1166 case XML_CHAR_ENCODING_8859_2:
1167 return("ISO-8859-2");
1168 case XML_CHAR_ENCODING_8859_3:
1169 return("ISO-8859-3");
1170 case XML_CHAR_ENCODING_8859_4:
1171 return("ISO-8859-4");
1172 case XML_CHAR_ENCODING_8859_5:
1173 return("ISO-8859-5");
1174 case XML_CHAR_ENCODING_8859_6:
1175 return("ISO-8859-6");
1176 case XML_CHAR_ENCODING_8859_7:
1177 return("ISO-8859-7");
1178 case XML_CHAR_ENCODING_8859_8:
1179 return("ISO-8859-8");
1180 case XML_CHAR_ENCODING_8859_9:
1181 return("ISO-8859-9");
1182 case XML_CHAR_ENCODING_2022_JP:
1183 return("ISO-2022-JP");
1184 case XML_CHAR_ENCODING_SHIFT_JIS:
1185 return("Shift-JIS");
1186 case XML_CHAR_ENCODING_EUC_JP:
1187 return("EUC-JP");
1188 case XML_CHAR_ENCODING_ASCII:
1189 return(NULL);
1190 }
1191 return(NULL);
1192}
1193
Daniel Veillard97ac1312001-05-30 19:14:17 +00001194/************************************************************************
1195 * *
1196 * Char encoding handlers *
1197 * *
1198 ************************************************************************/
1199
Owen Taylor3473f882001-02-23 17:55:21 +00001200
1201/* the size should be growable, but it's not a big deal ... */
1202#define MAX_ENCODING_HANDLERS 50
1203static xmlCharEncodingHandlerPtr *handlers = NULL;
1204static int nbCharEncodingHandler = 0;
1205
1206/*
1207 * The default is UTF-8 for XML, that's also the default used for the
1208 * parser internals, so the default encoding handler is NULL
1209 */
1210
1211static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1212
1213/**
1214 * xmlNewCharEncodingHandler:
1215 * @name: the encoding name, in UTF-8 format (ASCII actually)
1216 * @input: the xmlCharEncodingInputFunc to read that encoding
1217 * @output: the xmlCharEncodingOutputFunc to write that encoding
1218 *
1219 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001220 *
Owen Taylor3473f882001-02-23 17:55:21 +00001221 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1222 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001223xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001224xmlNewCharEncodingHandler(const char *name,
1225 xmlCharEncodingInputFunc input,
1226 xmlCharEncodingOutputFunc output) {
1227 xmlCharEncodingHandlerPtr handler;
1228 const char *alias;
1229 char upper[500];
1230 int i;
1231 char *up = 0;
1232
1233 /*
1234 * Do the alias resolution
1235 */
1236 alias = xmlGetEncodingAlias(name);
1237 if (alias != NULL)
1238 name = alias;
1239
1240 /*
1241 * Keep only the uppercase version of the encoding.
1242 */
1243 if (name == NULL) {
1244 xmlGenericError(xmlGenericErrorContext,
1245 "xmlNewCharEncodingHandler : no name !\n");
1246 return(NULL);
1247 }
1248 for (i = 0;i < 499;i++) {
1249 upper[i] = toupper(name[i]);
1250 if (upper[i] == 0) break;
1251 }
1252 upper[i] = 0;
1253 up = xmlMemStrdup(upper);
1254 if (up == NULL) {
1255 xmlGenericError(xmlGenericErrorContext,
1256 "xmlNewCharEncodingHandler : out of memory !\n");
1257 return(NULL);
1258 }
1259
1260 /*
1261 * allocate and fill-up an handler block.
1262 */
1263 handler = (xmlCharEncodingHandlerPtr)
1264 xmlMalloc(sizeof(xmlCharEncodingHandler));
1265 if (handler == NULL) {
1266 xmlGenericError(xmlGenericErrorContext,
1267 "xmlNewCharEncodingHandler : out of memory !\n");
1268 return(NULL);
1269 }
1270 handler->input = input;
1271 handler->output = output;
1272 handler->name = up;
1273
1274#ifdef LIBXML_ICONV_ENABLED
1275 handler->iconv_in = NULL;
1276 handler->iconv_out = NULL;
1277#endif /* LIBXML_ICONV_ENABLED */
1278
1279 /*
1280 * registers and returns the handler.
1281 */
1282 xmlRegisterCharEncodingHandler(handler);
1283#ifdef DEBUG_ENCODING
1284 xmlGenericError(xmlGenericErrorContext,
1285 "Registered encoding handler for %s\n", name);
1286#endif
1287 return(handler);
1288}
1289
1290/**
1291 * xmlInitCharEncodingHandlers:
1292 *
1293 * Initialize the char encoding support, it registers the default
1294 * encoding supported.
1295 * NOTE: while public, this function usually doesn't need to be called
1296 * in normal processing.
1297 */
1298void
1299xmlInitCharEncodingHandlers(void) {
1300 unsigned short int tst = 0x1234;
1301 unsigned char *ptr = (unsigned char *) &tst;
1302
1303 if (handlers != NULL) return;
1304
1305 handlers = (xmlCharEncodingHandlerPtr *)
1306 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1307
1308 if (*ptr == 0x12) xmlLittleEndian = 0;
1309 else if (*ptr == 0x34) xmlLittleEndian = 1;
1310 else xmlGenericError(xmlGenericErrorContext,
1311 "Odd problem at endianness detection\n");
1312
1313 if (handlers == NULL) {
1314 xmlGenericError(xmlGenericErrorContext,
1315 "xmlInitCharEncodingHandlers : out of memory !\n");
1316 return;
1317 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001318 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001319#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001320 xmlUTF16LEHandler =
1321 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1322 xmlUTF16BEHandler =
1323 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001324 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001325 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1326 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001327 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001328#ifdef LIBXML_HTML_ENABLED
1329 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1330#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001331#else
1332 xmlUTF16LEHandler =
1333 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1334 xmlUTF16BEHandler =
1335 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001336 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001337 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1338 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1339 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1340#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001341#ifndef LIBXML_ICONV_ENABLED
1342#ifdef LIBXML_ISO8859X_ENABLED
1343 xmlRegisterCharEncodingHandlersISO8859x ();
1344#endif
1345#endif
1346
Owen Taylor3473f882001-02-23 17:55:21 +00001347}
1348
1349/**
1350 * xmlCleanupCharEncodingHandlers:
1351 *
1352 * Cleanup the memory allocated for the char encoding support, it
1353 * unregisters all the encoding handlers and the aliases.
1354 */
1355void
1356xmlCleanupCharEncodingHandlers(void) {
1357 xmlCleanupEncodingAliases();
1358
1359 if (handlers == NULL) return;
1360
1361 for (;nbCharEncodingHandler > 0;) {
1362 nbCharEncodingHandler--;
1363 if (handlers[nbCharEncodingHandler] != NULL) {
1364 if (handlers[nbCharEncodingHandler]->name != NULL)
1365 xmlFree(handlers[nbCharEncodingHandler]->name);
1366 xmlFree(handlers[nbCharEncodingHandler]);
1367 }
1368 }
1369 xmlFree(handlers);
1370 handlers = NULL;
1371 nbCharEncodingHandler = 0;
1372 xmlDefaultCharEncodingHandler = NULL;
1373}
1374
1375/**
1376 * xmlRegisterCharEncodingHandler:
1377 * @handler: the xmlCharEncodingHandlerPtr handler block
1378 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001379 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001380 */
1381void
1382xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1383 if (handlers == NULL) xmlInitCharEncodingHandlers();
1384 if (handler == NULL) {
1385 xmlGenericError(xmlGenericErrorContext,
1386 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1387 return;
1388 }
1389
1390 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1391 xmlGenericError(xmlGenericErrorContext,
1392 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1393 xmlGenericError(xmlGenericErrorContext,
1394 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1395 return;
1396 }
1397 handlers[nbCharEncodingHandler++] = handler;
1398}
1399
1400/**
1401 * xmlGetCharEncodingHandler:
1402 * @enc: an xmlCharEncoding value.
1403 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001404 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001405 *
1406 * Returns the handler or NULL if not found
1407 */
1408xmlCharEncodingHandlerPtr
1409xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1410 xmlCharEncodingHandlerPtr handler;
1411
1412 if (handlers == NULL) xmlInitCharEncodingHandlers();
1413 switch (enc) {
1414 case XML_CHAR_ENCODING_ERROR:
1415 return(NULL);
1416 case XML_CHAR_ENCODING_NONE:
1417 return(NULL);
1418 case XML_CHAR_ENCODING_UTF8:
1419 return(NULL);
1420 case XML_CHAR_ENCODING_UTF16LE:
1421 return(xmlUTF16LEHandler);
1422 case XML_CHAR_ENCODING_UTF16BE:
1423 return(xmlUTF16BEHandler);
1424 case XML_CHAR_ENCODING_EBCDIC:
1425 handler = xmlFindCharEncodingHandler("EBCDIC");
1426 if (handler != NULL) return(handler);
1427 handler = xmlFindCharEncodingHandler("ebcdic");
1428 if (handler != NULL) return(handler);
1429 break;
1430 case XML_CHAR_ENCODING_UCS4BE:
1431 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1432 if (handler != NULL) return(handler);
1433 handler = xmlFindCharEncodingHandler("UCS-4");
1434 if (handler != NULL) return(handler);
1435 handler = xmlFindCharEncodingHandler("UCS4");
1436 if (handler != NULL) return(handler);
1437 break;
1438 case XML_CHAR_ENCODING_UCS4LE:
1439 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1440 if (handler != NULL) return(handler);
1441 handler = xmlFindCharEncodingHandler("UCS-4");
1442 if (handler != NULL) return(handler);
1443 handler = xmlFindCharEncodingHandler("UCS4");
1444 if (handler != NULL) return(handler);
1445 break;
1446 case XML_CHAR_ENCODING_UCS4_2143:
1447 break;
1448 case XML_CHAR_ENCODING_UCS4_3412:
1449 break;
1450 case XML_CHAR_ENCODING_UCS2:
1451 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1452 if (handler != NULL) return(handler);
1453 handler = xmlFindCharEncodingHandler("UCS-2");
1454 if (handler != NULL) return(handler);
1455 handler = xmlFindCharEncodingHandler("UCS2");
1456 if (handler != NULL) return(handler);
1457 break;
1458
1459 /*
1460 * We used to keep ISO Latin encodings native in the
1461 * generated data. This led to so many problems that
1462 * this has been removed. One can still change this
1463 * back by registering no-ops encoders for those
1464 */
1465 case XML_CHAR_ENCODING_8859_1:
1466 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1467 if (handler != NULL) return(handler);
1468 break;
1469 case XML_CHAR_ENCODING_8859_2:
1470 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1471 if (handler != NULL) return(handler);
1472 break;
1473 case XML_CHAR_ENCODING_8859_3:
1474 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1475 if (handler != NULL) return(handler);
1476 break;
1477 case XML_CHAR_ENCODING_8859_4:
1478 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1479 if (handler != NULL) return(handler);
1480 break;
1481 case XML_CHAR_ENCODING_8859_5:
1482 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1483 if (handler != NULL) return(handler);
1484 break;
1485 case XML_CHAR_ENCODING_8859_6:
1486 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1487 if (handler != NULL) return(handler);
1488 break;
1489 case XML_CHAR_ENCODING_8859_7:
1490 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1491 if (handler != NULL) return(handler);
1492 break;
1493 case XML_CHAR_ENCODING_8859_8:
1494 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1495 if (handler != NULL) return(handler);
1496 break;
1497 case XML_CHAR_ENCODING_8859_9:
1498 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1499 if (handler != NULL) return(handler);
1500 break;
1501
1502
1503 case XML_CHAR_ENCODING_2022_JP:
1504 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1505 if (handler != NULL) return(handler);
1506 break;
1507 case XML_CHAR_ENCODING_SHIFT_JIS:
1508 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1509 if (handler != NULL) return(handler);
1510 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1511 if (handler != NULL) return(handler);
1512 handler = xmlFindCharEncodingHandler("Shift_JIS");
1513 if (handler != NULL) return(handler);
1514 break;
1515 case XML_CHAR_ENCODING_EUC_JP:
1516 handler = xmlFindCharEncodingHandler("EUC-JP");
1517 if (handler != NULL) return(handler);
1518 break;
1519 default:
1520 break;
1521 }
1522
1523#ifdef DEBUG_ENCODING
1524 xmlGenericError(xmlGenericErrorContext,
1525 "No handler found for encoding %d\n", enc);
1526#endif
1527 return(NULL);
1528}
1529
1530/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 * xmlFindCharEncodingHandler:
1532 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001533 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001534 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001535 *
1536 * Returns the handler or NULL if not found
1537 */
1538xmlCharEncodingHandlerPtr
1539xmlFindCharEncodingHandler(const char *name) {
1540 const char *nalias;
1541 const char *norig;
1542 xmlCharEncoding alias;
1543#ifdef LIBXML_ICONV_ENABLED
1544 xmlCharEncodingHandlerPtr enc;
1545 iconv_t icv_in, icv_out;
1546#endif /* LIBXML_ICONV_ENABLED */
1547 char upper[100];
1548 int i;
1549
1550 if (handlers == NULL) xmlInitCharEncodingHandlers();
1551 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1552 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1553
1554 /*
1555 * Do the alias resolution
1556 */
1557 norig = name;
1558 nalias = xmlGetEncodingAlias(name);
1559 if (nalias != NULL)
1560 name = nalias;
1561
1562 /*
1563 * Check first for directly registered encoding names
1564 */
1565 for (i = 0;i < 99;i++) {
1566 upper[i] = toupper(name[i]);
1567 if (upper[i] == 0) break;
1568 }
1569 upper[i] = 0;
1570
1571 for (i = 0;i < nbCharEncodingHandler; i++)
1572 if (!strcmp(upper, handlers[i]->name)) {
1573#ifdef DEBUG_ENCODING
1574 xmlGenericError(xmlGenericErrorContext,
1575 "Found registered handler for encoding %s\n", name);
1576#endif
1577 return(handlers[i]);
1578 }
1579
1580#ifdef LIBXML_ICONV_ENABLED
1581 /* check whether iconv can handle this */
1582 icv_in = iconv_open("UTF-8", name);
1583 icv_out = iconv_open(name, "UTF-8");
1584 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1585 enc = (xmlCharEncodingHandlerPtr)
1586 xmlMalloc(sizeof(xmlCharEncodingHandler));
1587 if (enc == NULL) {
1588 iconv_close(icv_in);
1589 iconv_close(icv_out);
1590 return(NULL);
1591 }
1592 enc->name = xmlMemStrdup(name);
1593 enc->input = NULL;
1594 enc->output = NULL;
1595 enc->iconv_in = icv_in;
1596 enc->iconv_out = icv_out;
1597#ifdef DEBUG_ENCODING
1598 xmlGenericError(xmlGenericErrorContext,
1599 "Found iconv handler for encoding %s\n", name);
1600#endif
1601 return enc;
1602 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1603 xmlGenericError(xmlGenericErrorContext,
1604 "iconv : problems with filters for '%s'\n", name);
1605 }
1606#endif /* LIBXML_ICONV_ENABLED */
1607
1608#ifdef DEBUG_ENCODING
1609 xmlGenericError(xmlGenericErrorContext,
1610 "No handler found for encoding %s\n", name);
1611#endif
1612
1613 /*
1614 * Fallback using the canonical names
1615 */
1616 alias = xmlParseCharEncoding(norig);
1617 if (alias != XML_CHAR_ENCODING_ERROR) {
1618 const char* canon;
1619 canon = xmlGetCharEncodingName(alias);
1620 if ((canon != NULL) && (strcmp(name, canon))) {
1621 return(xmlFindCharEncodingHandler(canon));
1622 }
1623 }
1624
William M. Brackf9415e42003-11-28 09:39:10 +00001625 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001626 return(NULL);
1627}
1628
Daniel Veillard97ac1312001-05-30 19:14:17 +00001629/************************************************************************
1630 * *
1631 * ICONV based generic conversion functions *
1632 * *
1633 ************************************************************************/
1634
Owen Taylor3473f882001-02-23 17:55:21 +00001635#ifdef LIBXML_ICONV_ENABLED
1636/**
1637 * xmlIconvWrapper:
1638 * @cd: iconv converter data structure
1639 * @out: a pointer to an array of bytes to store the result
1640 * @outlen: the length of @out
1641 * @in: a pointer to an array of ISO Latin 1 chars
1642 * @inlen: the length of @in
1643 *
1644 * Returns 0 if success, or
1645 * -1 by lack of space, or
1646 * -2 if the transcoding fails (for *in is not valid utf8 string or
1647 * the result of transformation can't fit into the encoding we want), or
1648 * -3 if there the last byte can't form a single output char.
1649 *
1650 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001651 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001652 * The value of @outlen after return is the number of ocetes consumed.
1653 */
1654static int
1655xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00001656 unsigned char *out, int *outlen,
1657 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00001658
Daniel Veillard9403a042001-05-28 11:00:53 +00001659 size_t icv_inlen = *inlen, icv_outlen = *outlen;
1660 const char *icv_in = (const char *) in;
1661 char *icv_out = (char *) out;
1662 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001663
Darin Adler699613b2001-07-27 22:47:14 +00001664 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001665 if (in != NULL) {
1666 *inlen -= icv_inlen;
1667 *outlen -= icv_outlen;
1668 } else {
1669 *inlen = 0;
1670 *outlen = 0;
1671 }
1672 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001673#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001674 if (errno == EILSEQ) {
1675 return -2;
1676 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001677#endif
1678#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001679 if (errno == E2BIG) {
1680 return -1;
1681 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001682#endif
1683#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001684 if (errno == EINVAL) {
1685 return -3;
1686 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001687#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001688 {
1689 return -3;
1690 }
1691 }
1692 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001693}
1694#endif /* LIBXML_ICONV_ENABLED */
1695
Daniel Veillard97ac1312001-05-30 19:14:17 +00001696/************************************************************************
1697 * *
1698 * The real API used by libxml for on-the-fly conversion *
1699 * *
1700 ************************************************************************/
1701
Owen Taylor3473f882001-02-23 17:55:21 +00001702/**
1703 * xmlCharEncFirstLine:
1704 * @handler: char enconding transformation data structure
1705 * @out: an xmlBuffer for the output.
1706 * @in: an xmlBuffer for the input
1707 *
1708 * Front-end for the encoding handler input function, but handle only
1709 * the very first line, i.e. limit itself to 45 chars.
1710 *
1711 * Returns the number of byte written if success, or
1712 * -1 general error
1713 * -2 if the transcoding fails (for *in is not valid utf8 string or
1714 * the result of transformation can't fit into the encoding we want), or
1715 */
1716int
1717xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1718 xmlBufferPtr in) {
1719 int ret = -2;
1720 int written;
1721 int toconv;
1722
1723 if (handler == NULL) return(-1);
1724 if (out == NULL) return(-1);
1725 if (in == NULL) return(-1);
1726
1727 written = out->size - out->use;
1728 toconv = in->use;
1729 if (toconv * 2 >= written) {
1730 xmlBufferGrow(out, toconv);
1731 written = out->size - out->use - 1;
1732 }
1733
1734 /*
1735 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1736 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001737 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001738 */
1739 written = 45;
1740
1741 if (handler->input != NULL) {
1742 ret = handler->input(&out->content[out->use], &written,
1743 in->content, &toconv);
1744 xmlBufferShrink(in, toconv);
1745 out->use += written;
1746 out->content[out->use] = 0;
1747 }
1748#ifdef LIBXML_ICONV_ENABLED
1749 else if (handler->iconv_in != NULL) {
1750 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1751 &written, in->content, &toconv);
1752 xmlBufferShrink(in, toconv);
1753 out->use += written;
1754 out->content[out->use] = 0;
1755 if (ret == -1) ret = -3;
1756 }
1757#endif /* LIBXML_ICONV_ENABLED */
1758#ifdef DEBUG_ENCODING
1759 switch (ret) {
1760 case 0:
1761 xmlGenericError(xmlGenericErrorContext,
1762 "converted %d bytes to %d bytes of input\n",
1763 toconv, written);
1764 break;
1765 case -1:
1766 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1767 toconv, written, in->use);
1768 break;
1769 case -2:
1770 xmlGenericError(xmlGenericErrorContext,
1771 "input conversion failed due to input error\n");
1772 break;
1773 case -3:
1774 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1775 toconv, written, in->use);
1776 break;
1777 default:
1778 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1779 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001780#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001781 /*
1782 * Ignore when input buffer is not on a boundary
1783 */
1784 if (ret == -3) ret = 0;
1785 if (ret == -1) ret = 0;
1786 return(ret);
1787}
1788
1789/**
1790 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001791 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001792 * @out: an xmlBuffer for the output.
1793 * @in: an xmlBuffer for the input
1794 *
1795 * Generic front-end for the encoding handler input function
1796 *
1797 * Returns the number of byte written if success, or
1798 * -1 general error
1799 * -2 if the transcoding fails (for *in is not valid utf8 string or
1800 * the result of transformation can't fit into the encoding we want), or
1801 */
1802int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001803xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1804 xmlBufferPtr in)
1805{
Owen Taylor3473f882001-02-23 17:55:21 +00001806 int ret = -2;
1807 int written;
1808 int toconv;
1809
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001810 if (handler == NULL)
1811 return (-1);
1812 if (out == NULL)
1813 return (-1);
1814 if (in == NULL)
1815 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001816
1817 toconv = in->use;
1818 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001819 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001820 written = out->size - out->use;
1821 if (toconv * 2 >= written) {
1822 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001823 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001824 }
1825 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001826 ret = handler->input(&out->content[out->use], &written,
1827 in->content, &toconv);
1828 xmlBufferShrink(in, toconv);
1829 out->use += written;
1830 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001831 }
1832#ifdef LIBXML_ICONV_ENABLED
1833 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001834 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1835 &written, in->content, &toconv);
1836 xmlBufferShrink(in, toconv);
1837 out->use += written;
1838 out->content[out->use] = 0;
1839 if (ret == -1)
1840 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001841 }
1842#endif /* LIBXML_ICONV_ENABLED */
1843 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001844 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001845#ifdef DEBUG_ENCODING
1846 xmlGenericError(xmlGenericErrorContext,
1847 "converted %d bytes to %d bytes of input\n",
1848 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001849#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001850 break;
1851 case -1:
1852#ifdef DEBUG_ENCODING
1853 xmlGenericError(xmlGenericErrorContext,
1854 "converted %d bytes to %d bytes of input, %d left\n",
1855 toconv, written, in->use);
1856#endif
1857 break;
1858 case -3:
1859#ifdef DEBUG_ENCODING
1860 xmlGenericError(xmlGenericErrorContext,
1861 "converted %d bytes to %d bytes of input, %d left\n",
1862 toconv, written, in->use);
1863#endif
1864 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001865 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001866 xmlGenericError(xmlGenericErrorContext,
1867 "input conversion failed due to input error\n");
1868 xmlGenericError(xmlGenericErrorContext,
1869 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1870 in->content[0], in->content[1],
1871 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001872 }
1873 /*
1874 * Ignore when input buffer is not on a boundary
1875 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001876 if (ret == -3)
1877 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001878 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001879}
1880
1881/**
1882 * xmlCharEncOutFunc:
1883 * @handler: char enconding transformation data structure
1884 * @out: an xmlBuffer for the output.
1885 * @in: an xmlBuffer for the input
1886 *
1887 * Generic front-end for the encoding handler output function
1888 * a first call with @in == NULL has to be made firs to initiate the
1889 * output in case of non-stateless encoding needing to initiate their
1890 * state or the output (like the BOM in UTF16).
1891 * In case of UTF8 sequence conversion errors for the given encoder,
1892 * the content will be automatically remapped to a CharRef sequence.
1893 *
1894 * Returns the number of byte written if success, or
1895 * -1 general error
1896 * -2 if the transcoding fails (for *in is not valid utf8 string or
1897 * the result of transformation can't fit into the encoding we want), or
1898 */
1899int
1900xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1901 xmlBufferPtr in) {
1902 int ret = -2;
1903 int written;
1904 int writtentot = 0;
1905 int toconv;
1906 int output = 0;
1907
1908 if (handler == NULL) return(-1);
1909 if (out == NULL) return(-1);
1910
1911retry:
1912
1913 written = out->size - out->use;
1914
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001915 if (written > 0)
1916 written--; /* Gennady: count '/0' */
1917
Owen Taylor3473f882001-02-23 17:55:21 +00001918 /*
1919 * First specific handling of in = NULL, i.e. the initialization call
1920 */
1921 if (in == NULL) {
1922 toconv = 0;
1923 if (handler->output != NULL) {
1924 ret = handler->output(&out->content[out->use], &written,
1925 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001926 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001927 out->use += written;
1928 out->content[out->use] = 0;
1929 }
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931#ifdef LIBXML_ICONV_ENABLED
1932 else if (handler->iconv_out != NULL) {
1933 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1934 &written, NULL, &toconv);
1935 out->use += written;
1936 out->content[out->use] = 0;
1937 }
1938#endif /* LIBXML_ICONV_ENABLED */
1939#ifdef DEBUG_ENCODING
1940 xmlGenericError(xmlGenericErrorContext,
1941 "initialized encoder\n");
1942#endif
1943 return(0);
1944 }
1945
1946 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001947 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001948 */
1949 toconv = in->use;
1950 if (toconv == 0)
1951 return(0);
1952 if (toconv * 2 >= written) {
1953 xmlBufferGrow(out, toconv * 2);
1954 written = out->size - out->use - 1;
1955 }
1956 if (handler->output != NULL) {
1957 ret = handler->output(&out->content[out->use], &written,
1958 in->content, &toconv);
1959 xmlBufferShrink(in, toconv);
1960 out->use += written;
1961 writtentot += written;
1962 out->content[out->use] = 0;
1963 }
1964#ifdef LIBXML_ICONV_ENABLED
1965 else if (handler->iconv_out != NULL) {
1966 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1967 &written, in->content, &toconv);
1968 xmlBufferShrink(in, toconv);
1969 out->use += written;
1970 writtentot += written;
1971 out->content[out->use] = 0;
1972 if (ret == -1) {
1973 if (written > 0) {
1974 /*
1975 * Can be a limitation of iconv
1976 */
1977 goto retry;
1978 }
1979 ret = -3;
1980 }
1981 }
1982#endif /* LIBXML_ICONV_ENABLED */
1983 else {
1984 xmlGenericError(xmlGenericErrorContext,
1985 "xmlCharEncOutFunc: no output function !\n");
1986 return(-1);
1987 }
1988
1989 if (ret >= 0) output += ret;
1990
1991 /*
1992 * Attempt to handle error cases
1993 */
1994 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001995 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001996#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00001997 xmlGenericError(xmlGenericErrorContext,
1998 "converted %d bytes to %d bytes of output\n",
1999 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002000#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002001 break;
2002 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002003#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002004 xmlGenericError(xmlGenericErrorContext,
2005 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002006#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002007 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002008 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002009#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002010 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2011 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002012#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002013 break;
2014 case -2: {
2015 int len = in->use;
2016 const xmlChar *utf = (const xmlChar *) in->content;
2017 int cur;
2018
2019 cur = xmlGetUTF8Char(utf, &len);
2020 if (cur > 0) {
2021 xmlChar charref[20];
2022
2023#ifdef DEBUG_ENCODING
2024 xmlGenericError(xmlGenericErrorContext,
2025 "handling output conversion error\n");
2026 xmlGenericError(xmlGenericErrorContext,
2027 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2028 in->content[0], in->content[1],
2029 in->content[2], in->content[3]);
2030#endif
2031 /*
2032 * Removes the UTF8 sequence, and replace it by a charref
2033 * and continue the transcoding phase, hoping the error
2034 * did not mangle the encoder state.
2035 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002036 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002037 xmlBufferShrink(in, len);
2038 xmlBufferAddHead(in, charref, -1);
2039
2040 goto retry;
2041 } else {
2042 xmlGenericError(xmlGenericErrorContext,
2043 "output conversion failed due to conv error\n");
2044 xmlGenericError(xmlGenericErrorContext,
2045 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2046 in->content[0], in->content[1],
2047 in->content[2], in->content[3]);
2048 in->content[0] = ' ';
2049 }
2050 break;
2051 }
2052 }
2053 return(ret);
2054}
2055
2056/**
2057 * xmlCharEncCloseFunc:
2058 * @handler: char enconding transformation data structure
2059 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002060 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002061 *
2062 * Returns 0 if success, or -1 in case of error
2063 */
2064int
2065xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2066 int ret = 0;
2067 if (handler == NULL) return(-1);
2068 if (handler->name == NULL) return(-1);
2069#ifdef LIBXML_ICONV_ENABLED
2070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002071 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002072 * and the associated icon resources.
2073 */
2074 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2075 if (handler->name != NULL)
2076 xmlFree(handler->name);
2077 handler->name = NULL;
2078 if (handler->iconv_out != NULL) {
2079 if (iconv_close(handler->iconv_out))
2080 ret = -1;
2081 handler->iconv_out = NULL;
2082 }
2083 if (handler->iconv_in != NULL) {
2084 if (iconv_close(handler->iconv_in))
2085 ret = -1;
2086 handler->iconv_in = NULL;
2087 }
2088 xmlFree(handler);
2089 }
2090#endif /* LIBXML_ICONV_ENABLED */
2091#ifdef DEBUG_ENCODING
2092 if (ret)
2093 xmlGenericError(xmlGenericErrorContext,
2094 "failed to close the encoding handler\n");
2095 else
2096 xmlGenericError(xmlGenericErrorContext,
2097 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002098#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002099
Owen Taylor3473f882001-02-23 17:55:21 +00002100 return(ret);
2101}
2102
Daniel Veillard36711902004-02-11 13:25:26 +00002103/**
2104 * xmlByteConsumed:
2105 * @ctxt: an XML parser context
2106 *
2107 * This function provides the current index of the parser relative
2108 * to the start of the current entity. This function is computed in
2109 * bytes from the beginning starting at zero and finishing at the
2110 * size in byte of the file if parsing a file. The function is
2111 * of constant cost if the input is UTF-8 but can be costly if run
2112 * on non-UTF-8 input.
2113 *
2114 * Returns the index in bytes from the beginning of the entity or -1
2115 * in case the index could not be computed.
2116 */
2117long
2118xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2119 xmlParserInputPtr in;
2120
2121 if (ctxt == NULL) return(-1);
2122 in = ctxt->input;
2123 if (in == NULL) return(-1);
2124 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2125 unsigned int unused = 0;
2126 xmlCharEncodingHandler * handler = in->buf->encoder;
2127 /*
2128 * Encoding conversion, compute the number of unused original
2129 * bytes from the input not consumed and substract that from
2130 * the raw consumed value, this is not a cheap operation
2131 */
2132 if (in->end - in->cur > 0) {
2133 static unsigned char convbuf[32000];
2134 unsigned char *cur = in->cur;
2135 int toconv = in->end - in->cur, written = 32000;
2136
2137 int ret;
2138
2139 if (handler->output != NULL) {
2140 do {
2141 toconv = in->end - cur;
2142 written = 32000;
2143 ret = handler->output(&convbuf[0], &written,
2144 cur, &toconv);
2145 if (ret == -1) return(-1);
2146 unused += written;
2147 cur += toconv;
2148 } while (ret == -2);
2149#ifdef LIBXML_ICONV_ENABLED
2150 } else if (handler->iconv_out != NULL) {
2151 do {
2152 toconv = in->end - cur;
2153 written = 32000;
2154 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2155 &written, cur, &toconv);
2156 if (ret == -1) {
2157 if (written > 0)
2158 ret = -2;
2159 else
2160 return(-1);
2161 }
2162 unused += written;
2163 cur += toconv;
2164 } while (ret == -2);
2165#endif
2166 } else {
2167 /* could not find a converter */
2168 return(-1);
2169 }
2170 }
2171 if (in->buf->rawconsumed < unused)
2172 return(-1);
2173 return(in->buf->rawconsumed - unused);
2174 }
2175 return(in->consumed + (in->cur - in->base));
2176}
2177
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002178#ifndef LIBXML_ICONV_ENABLED
2179#ifdef LIBXML_ISO8859X_ENABLED
2180
2181/**
2182 * UTF8ToISO8859x:
2183 * @out: a pointer to an array of bytes to store the result
2184 * @outlen: the length of @out
2185 * @in: a pointer to an array of UTF-8 chars
2186 * @inlen: the length of @in
2187 * @xlattable: the 2-level transcoding table
2188 *
2189 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2190 * block of chars out.
2191 *
2192 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2193 * The value of @inlen after return is the number of octets consumed
2194 * as the return value is positive, else unpredictable.
2195 * The value of @outlen after return is the number of ocetes consumed.
2196 */
2197static int
2198UTF8ToISO8859x(unsigned char* out, int *outlen,
2199 const unsigned char* in, int *inlen,
2200 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002201 const unsigned char* outstart = out;
2202 const unsigned char* inend;
2203 const unsigned char* instart = in;
2204
2205 if (in == NULL) {
2206 /*
2207 * initialization nothing to do
2208 */
2209 *outlen = 0;
2210 *inlen = 0;
2211 return(0);
2212 }
2213 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002214 while (in < inend) {
2215 unsigned char d = *in++;
2216 if (d < 0x80) {
2217 *out++ = d;
2218 } else if (d < 0xC0) {
2219 /* trailing byte in leading position */
2220 *outlen = out - outstart;
2221 *inlen = in - instart - 1;
2222 return(-2);
2223 } else if (d < 0xE0) {
2224 unsigned char c;
2225 if (!(in < inend)) {
2226 /* trailing byte not in input buffer */
2227 *outlen = out - outstart;
2228 *inlen = in - instart - 1;
2229 return(-2);
2230 }
2231 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002232 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002233 /* not a trailing byte */
2234 *outlen = out - outstart;
2235 *inlen = in - instart - 2;
2236 return(-2);
2237 }
2238 c = c & 0x3F;
2239 d = d & 0x1F;
2240 d = xlattable [48 + c + xlattable [d] * 64];
2241 if (d == 0) {
2242 /* not in character set */
2243 *outlen = out - outstart;
2244 *inlen = in - instart - 2;
2245 return(-2);
2246 }
2247 *out++ = d;
2248 } else if (d < 0xF0) {
2249 unsigned char c1;
2250 unsigned char c2;
2251 if (!(in < inend - 1)) {
2252 /* trailing bytes not in input buffer */
2253 *outlen = out - outstart;
2254 *inlen = in - instart - 1;
2255 return(-2);
2256 }
2257 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002258 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002259 /* not a trailing byte (c1) */
2260 *outlen = out - outstart;
2261 *inlen = in - instart - 2;
2262 return(-2);
2263 }
2264 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002265 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002266 /* not a trailing byte (c2) */
2267 *outlen = out - outstart;
2268 *inlen = in - instart - 2;
2269 return(-2);
2270 }
2271 c1 = c1 & 0x3F;
2272 c2 = c2 & 0x3F;
2273 d = d & 0x0F;
2274 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2275 if (d == 0) {
2276 /* not in character set */
2277 *outlen = out - outstart;
2278 *inlen = in - instart - 3;
2279 return(-2);
2280 }
2281 *out++ = d;
2282 } else {
2283 /* cannot transcode >= U+010000 */
2284 *outlen = out - outstart;
2285 *inlen = in - instart - 1;
2286 return(-2);
2287 }
2288 }
2289 *outlen = out - outstart;
2290 *inlen = in - instart;
2291 return(0);
2292}
2293
2294/**
2295 * ISO8859xToUTF8
2296 * @out: a pointer to an array of bytes to store the result
2297 * @outlen: the length of @out
2298 * @in: a pointer to an array of ISO Latin 1 chars
2299 * @inlen: the length of @in
2300 *
2301 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2302 * block of chars out.
2303 * Returns 0 if success, or -1 otherwise
2304 * The value of @inlen after return is the number of octets consumed
2305 * The value of @outlen after return is the number of ocetes produced.
2306 */
2307static int
2308ISO8859xToUTF8(unsigned char* out, int *outlen,
2309 const unsigned char* in, int *inlen,
2310 unsigned short const *unicodetable) {
2311 unsigned char* outstart = out;
2312 unsigned char* outend = out + *outlen;
2313 const unsigned char* instart = in;
2314 const unsigned char* inend = in + *inlen;
2315 const unsigned char* instop = inend;
2316 unsigned int c = *in;
2317
2318 while (in < inend && out < outend - 1) {
2319 if (c >= 0x80) {
2320 c = unicodetable [c - 0x80];
2321 if (c == 0) {
2322 /* undefined code point */
2323 *outlen = out - outstart;
2324 *inlen = in - instart;
2325 return (-1);
2326 }
2327 if (c < 0x800) {
2328 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2329 *out++ = (c & 0x3F) | 0x80;
2330 } else {
2331 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2332 *out++ = ((c >> 6) & 0x3F) | 0x80;
2333 *out++ = (c & 0x3F) | 0x80;
2334 }
2335 ++in;
2336 c = *in;
2337 }
2338 if (instop - in > outend - out) instop = in + (outend - out);
2339 while (c < 0x80 && in < instop) {
2340 *out++ = c;
2341 ++in;
2342 c = *in;
2343 }
2344 }
2345 if (in < inend && out < outend && c < 0x80) {
2346 *out++ = c;
2347 ++in;
2348 }
2349 *outlen = out - outstart;
2350 *inlen = in - instart;
2351 return (0);
2352}
2353
2354
2355/************************************************************************
2356 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2357 ************************************************************************/
2358
2359static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2360 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2361 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2362 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2363 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2364 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2365 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2366 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2367 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2368 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2369 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2370 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2371 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2372 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2373 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2374 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2375 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2376};
2377
2378static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2379 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2386 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2387 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2388 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2389 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2390 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2391 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2394 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2395 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2398 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2399 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2400 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2401 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2402 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2403 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2404 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2405 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2406};
2407
2408static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2409 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2410 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2411 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2412 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2413 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2414 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2415 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2416 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2417 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2418 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2419 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2420 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2421 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2422 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2423 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2424 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2425};
2426
2427static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2428 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2435 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2436 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2437 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2438 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2439 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2441 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2442 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2444 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2446 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2447 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2449 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2455 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2456 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2457 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2458 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2459};
2460
2461static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2462 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2463 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2464 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2465 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2466 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2467 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2468 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2469 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2470 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2471 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2472 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2473 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2474 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2475 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2476 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2477 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2478};
2479
2480static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2481 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2488 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2489 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2490 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2491 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2492 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2493 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2494 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2495 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2496 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2497 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2498 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2499 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2500 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2502 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2504 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2505 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2506 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2507 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2508};
2509
2510static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2511 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2512 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2513 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2514 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2515 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2516 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2517 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2518 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2519 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2520 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2521 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2522 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2523 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2524 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2525 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2526 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2527};
2528
2529static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2530 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2531 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2532 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2537 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2538 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2539 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2541 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2542 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2543 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2544 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2545 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2546 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2549 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2552 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2553 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2554 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2557};
2558
2559static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2560 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2561 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2562 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2563 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2564 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2565 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2566 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2567 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2568 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2569 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2570 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2571 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2572 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2573 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2574 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2575 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2576};
2577
2578static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2579 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2585 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2586 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2587 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2588 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2589 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2590 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2596 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2597 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2598 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2599 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2600 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2601 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2602};
2603
2604static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2605 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2606 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2607 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2608 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2609 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2610 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2611 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2612 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2613 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2614 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2615 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2616 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2617 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2618 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2619 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2620 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2621};
2622
2623static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2624 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2626 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2631 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2632 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2633 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2634 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2635 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2640 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2643 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2646 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2647 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2648 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2649 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2650 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2651 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2655};
2656
2657static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2658 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2659 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2660 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2661 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2662 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2663 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2664 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2665 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2666 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2667 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2668 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2669 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2670 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2671 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2672 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2673 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2674};
2675
2676static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2677 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2679 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2684 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2685 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2686 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2687 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2688 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2693 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2701 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2706 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708};
2709
2710static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2711 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2712 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2713 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2714 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2715 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2716 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2717 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2718 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2719 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2720 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2721 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2722 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2723 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2724 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2725 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2726 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2727};
2728
2729static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2730 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2733 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2738 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2739 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2740 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2741 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2742 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2743 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2744 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753};
2754
2755static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2756 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2757 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2758 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2759 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2760 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2761 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2762 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2763 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2764 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2765 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2766 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2767 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2768 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2769 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2770 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2771 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2772};
2773
2774static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2775 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2777 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2783 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2784 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2785 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2786 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2787 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2788 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2789 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2790 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2793 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2794 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2803 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2804 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2805 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2806};
2807
2808static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2809 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2810 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2811 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2812 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2813 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2814 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2815 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2816 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2817 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2818 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2819 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2820 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2821 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2822 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2823 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2824 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2825};
2826
2827static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2828 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2836 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2837 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2843 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2844 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2845 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2846 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2847 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2852 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2853 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855};
2856
2857static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2858 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2859 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2860 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2861 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2862 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2863 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2864 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2865 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2866 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2867 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2868 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2869 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2870 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2871 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2872 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2873 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2874};
2875
2876static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2877 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2884 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2885 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2886 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2887 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2888 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2898 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2900 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2901 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2902 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2903 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2904 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2905 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2907 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2908};
2909
2910static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2911 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2912 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2913 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2914 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2915 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2916 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2917 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2918 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2919 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2920 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2921 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2922 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2923 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2924 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2925 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2926 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2927};
2928
2929static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2930 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2938 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2939 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2940 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2945 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2946 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2965 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2970 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2971 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2972 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2973};
2974
2975static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
2976 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2977 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2978 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2979 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2980 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
2981 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2982 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
2983 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
2984 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2985 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2986 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2987 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2988 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2989 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2990 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2991 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
2992};
2993
2994static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
2995 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3003 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3004 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3005 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3006 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3018 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3019 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3020 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3021 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3022};
3023
3024static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3025 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3026 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3027 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3028 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3029 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3030 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3031 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3032 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3033 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3034 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3035 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3036 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3037 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3038 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3039 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3040 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3041};
3042
3043static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3044 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3052 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3053 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3054 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3055 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3056 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3061 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3063 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3079 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3080 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3081 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3082 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3083};
3084
3085
3086/*
3087 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3088 */
3089
3090static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3091 const unsigned char* in, int *inlen) {
3092 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3093}
3094static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3095 const unsigned char* in, int *inlen) {
3096 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3097}
3098
3099static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3100 const unsigned char* in, int *inlen) {
3101 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3102}
3103static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3104 const unsigned char* in, int *inlen) {
3105 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3106}
3107
3108static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3109 const unsigned char* in, int *inlen) {
3110 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3111}
3112static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3113 const unsigned char* in, int *inlen) {
3114 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3115}
3116
3117static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3118 const unsigned char* in, int *inlen) {
3119 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3120}
3121static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3122 const unsigned char* in, int *inlen) {
3123 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3124}
3125
3126static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3127 const unsigned char* in, int *inlen) {
3128 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3129}
3130static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3131 const unsigned char* in, int *inlen) {
3132 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3133}
3134
3135static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3136 const unsigned char* in, int *inlen) {
3137 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3138}
3139static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3140 const unsigned char* in, int *inlen) {
3141 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3142}
3143
3144static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3145 const unsigned char* in, int *inlen) {
3146 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3147}
3148static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3149 const unsigned char* in, int *inlen) {
3150 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3151}
3152
3153static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3154 const unsigned char* in, int *inlen) {
3155 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3156}
3157static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3158 const unsigned char* in, int *inlen) {
3159 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3160}
3161
3162static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3163 const unsigned char* in, int *inlen) {
3164 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3165}
3166static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3167 const unsigned char* in, int *inlen) {
3168 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3169}
3170
3171static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3172 const unsigned char* in, int *inlen) {
3173 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3174}
3175static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3176 const unsigned char* in, int *inlen) {
3177 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3178}
3179
3180static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3181 const unsigned char* in, int *inlen) {
3182 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3183}
3184static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3185 const unsigned char* in, int *inlen) {
3186 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3187}
3188
3189static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3190 const unsigned char* in, int *inlen) {
3191 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3192}
3193static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3194 const unsigned char* in, int *inlen) {
3195 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3196}
3197
3198static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3199 const unsigned char* in, int *inlen) {
3200 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3201}
3202static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3203 const unsigned char* in, int *inlen) {
3204 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3205}
3206
3207static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3208 const unsigned char* in, int *inlen) {
3209 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3210}
3211static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3212 const unsigned char* in, int *inlen) {
3213 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3214}
3215
3216static void
3217xmlRegisterCharEncodingHandlersISO8859x (void) {
3218 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3219 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3220 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3221 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3222 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3223 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3224 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3225 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3226 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3227 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3228 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3229 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3230 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3231 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3232}
3233
3234#endif
3235#endif
3236
3237