blob: d38d4c9ecb63c68c5ffe9bda410d12462b66c5ec [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
20 * UTF8 string routines from:
21 * "William M. Brack" <wbrack@mmm.com.hk>
22 *
23 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000024 */
25
Daniel Veillard34ce8be2002-03-18 19:37:11 +000026#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000027#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000028
Owen Taylor3473f882001-02-23 17:55:21 +000029#include <string.h>
30
31#ifdef HAVE_CTYPE_H
32#include <ctype.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
Owen Taylor3473f882001-02-23 17:55:21 +000037#ifdef LIBXML_ICONV_ENABLED
38#ifdef HAVE_ERRNO_H
39#include <errno.h>
40#endif
41#endif
42#include <libxml/encoding.h>
43#include <libxml/xmlmemory.h>
44#ifdef LIBXML_HTML_ENABLED
45#include <libxml/HTMLparser.h>
46#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000047#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000048#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049
Daniel Veillard22090732001-07-16 00:06:07 +000050static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
51static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000052
53typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
54typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
55struct _xmlCharEncodingAlias {
56 const char *name;
57 const char *alias;
58};
59
60static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
61static int xmlCharEncodingAliasesNb = 0;
62static int xmlCharEncodingAliasesMax = 0;
63
64#ifdef LIBXML_ICONV_ENABLED
65#if 0
66#define DEBUG_ENCODING /* Define this to get encoding traces */
67#endif
William M. Brack16db7b62003-08-07 13:12:49 +000068#else
69#ifdef LIBXML_ISO8859X_ENABLED
70static void xmlRegisterCharEncodingHandlersISO8859x (void);
71#endif
Owen Taylor3473f882001-02-23 17:55:21 +000072#endif
73
74static int xmlLittleEndian = 1;
75
Daniel Veillard97ac1312001-05-30 19:14:17 +000076/************************************************************************
77 * *
78 * Generic UTF8 handling routines *
79 * *
80 * From rfc2044: encoding of the Unicode values on UTF-8: *
81 * *
82 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
83 * 0000 0000-0000 007F 0xxxxxxx *
84 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
85 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
86 * *
87 * I hope we won't use values > 0xFFFF anytime soon ! *
88 * *
89 ************************************************************************/
Owen Taylor3473f882001-02-23 17:55:21 +000090
91/**
William M. Brack4a557d92003-07-29 04:28:04 +000092 * xmlUTF8Size:
93 * @utf: pointer to the UTF8 character
94 *
William M. Brack7a821652003-08-15 07:27:40 +000095 * calulates the internal size of a UTF8 character
96 *
William M. Brack4a557d92003-07-29 04:28:04 +000097 * returns the numbers of bytes in the character, -1 on format error
98 */
99int
100xmlUTF8Size(const xmlChar *utf) {
101 xmlChar mask;
102 int len;
103
104 if (utf == NULL)
105 return -1;
106 if (*utf < 0x80)
107 return 1;
108 /* check valid UTF8 character */
109 if (!(*utf & 0x40))
110 return -1;
111 /* determine number of bytes in char */
112 len = 2;
113 for (mask=0x20; mask != 0; mask>>=1) {
114 if (!(*utf & mask))
115 return len;
116 len++;
117 }
118 return -1;
119}
120
121/**
William M. Brack7a821652003-08-15 07:27:40 +0000122 * xmlUTF8Charcmp:
William M. Brack4a557d92003-07-29 04:28:04 +0000123 * @utf1: pointer to first UTF8 char
124 * @utf2: pointer to second UTF8 char
125 *
William M. Brack7a821652003-08-15 07:27:40 +0000126 * compares the two UCS4 values
127 *
128 * returns result of the compare as with xmlStrncmp
William M. Brack4a557d92003-07-29 04:28:04 +0000129 */
130int
131xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
132
133 if (utf1 == NULL ) {
134 if (utf2 == NULL)
135 return 0;
136 return -1;
137 }
Daniel Veillard9ff7de12003-07-29 13:30:42 +0000138 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
William M. Brack4a557d92003-07-29 04:28:04 +0000139}
140
141/**
Daniel Veillarde043ee12001-04-16 14:08:07 +0000142 * xmlUTF8Strlen:
143 * @utf: a sequence of UTF-8 encoded bytes
144 *
Daniel Veillard60087f32001-10-10 09:45:09 +0000145 * compute the length of an UTF8 string, it doesn't do a full UTF8
Daniel Veillarde043ee12001-04-16 14:08:07 +0000146 * checking of the content of the string.
147 *
148 * Returns the number of characters in the string or -1 in case of error
149 */
150int
Daniel Veillard97ac1312001-05-30 19:14:17 +0000151xmlUTF8Strlen(const xmlChar *utf) {
Daniel Veillarde043ee12001-04-16 14:08:07 +0000152 int ret = 0;
153
154 if (utf == NULL)
155 return(-1);
156
157 while (*utf != 0) {
158 if (utf[0] & 0x80) {
159 if ((utf[1] & 0xc0) != 0x80)
160 return(-1);
161 if ((utf[0] & 0xe0) == 0xe0) {
162 if ((utf[2] & 0xc0) != 0x80)
163 return(-1);
164 if ((utf[0] & 0xf0) == 0xf0) {
165 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
166 return(-1);
167 utf += 4;
168 } else {
169 utf += 3;
170 }
171 } else {
172 utf += 2;
173 }
174 } else {
175 utf++;
176 }
177 ret++;
178 }
179 return(ret);
180}
181
182/**
Owen Taylor3473f882001-02-23 17:55:21 +0000183 * xmlGetUTF8Char:
184 * @utf: a sequence of UTF-8 encoded bytes
185 * @len: a pointer to @bytes len
186 *
187 * Read one UTF8 Char from @utf
188 *
189 * Returns the char value or -1 in case of error and update @len with the
190 * number of bytes used
191 */
Daniel Veillardf000f072002-10-22 14:28:17 +0000192int
Owen Taylor3473f882001-02-23 17:55:21 +0000193xmlGetUTF8Char(const unsigned char *utf, int *len) {
194 unsigned int c;
195
196 if (utf == NULL)
197 goto error;
198 if (len == NULL)
199 goto error;
200 if (*len < 1)
201 goto error;
202
203 c = utf[0];
204 if (c & 0x80) {
205 if (*len < 2)
206 goto error;
207 if ((utf[1] & 0xc0) != 0x80)
208 goto error;
209 if ((c & 0xe0) == 0xe0) {
210 if (*len < 3)
211 goto error;
212 if ((utf[2] & 0xc0) != 0x80)
213 goto error;
214 if ((c & 0xf0) == 0xf0) {
215 if (*len < 4)
216 goto error;
217 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
218 goto error;
219 *len = 4;
220 /* 4-byte code */
221 c = (utf[0] & 0x7) << 18;
222 c |= (utf[1] & 0x3f) << 12;
223 c |= (utf[2] & 0x3f) << 6;
224 c |= utf[3] & 0x3f;
225 } else {
226 /* 3-byte code */
227 *len = 3;
228 c = (utf[0] & 0xf) << 12;
229 c |= (utf[1] & 0x3f) << 6;
230 c |= utf[2] & 0x3f;
231 }
232 } else {
233 /* 2-byte code */
234 *len = 2;
235 c = (utf[0] & 0x1f) << 6;
236 c |= utf[1] & 0x3f;
237 }
238 } else {
239 /* 1-byte code */
240 *len = 1;
241 }
242 return(c);
243
244error:
245 *len = 0;
246 return(-1);
247}
248
249/**
Daniel Veillard01c13b52002-12-10 15:19:08 +0000250 * xmlCheckUTF8:
Owen Taylor3473f882001-02-23 17:55:21 +0000251 * @utf: Pointer to putative utf-8 encoded string.
252 *
253 * Checks @utf for being valid utf-8. @utf is assumed to be
254 * null-terminated. This function is not super-strict, as it will
255 * allow longer utf-8 sequences than necessary. Note that Java is
256 * capable of producing these sequences if provoked. Also note, this
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000257 * routine checks for the 4-byte maximum size, but does not check for
Owen Taylor3473f882001-02-23 17:55:21 +0000258 * 0x10ffff maximum value.
259 *
260 * Return value: true if @utf is valid.
261 **/
262int
263xmlCheckUTF8(const unsigned char *utf)
264{
265 int ix;
266 unsigned char c;
267
268 for (ix = 0; (c = utf[ix]);) {
269 if (c & 0x80) {
270 if ((utf[ix + 1] & 0xc0) != 0x80)
271 return(0);
272 if ((c & 0xe0) == 0xe0) {
273 if ((utf[ix + 2] & 0xc0) != 0x80)
274 return(0);
275 if ((c & 0xf0) == 0xf0) {
276 if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
277 return(0);
278 ix += 4;
279 /* 4-byte code */
280 } else
281 /* 3-byte code */
282 ix += 3;
283 } else
284 /* 2-byte code */
285 ix += 2;
286 } else
287 /* 1-byte code */
288 ix++;
289 }
290 return(1);
291}
292
293/**
Daniel Veillard97ac1312001-05-30 19:14:17 +0000294 * xmlUTF8Strsize:
295 * @utf: a sequence of UTF-8 encoded bytes
296 * @len: the number of characters in the array
297 *
298 * storage size of an UTF8 string
299 *
300 * Returns the storage size of
301 * the first 'len' characters of ARRAY
302 *
303 */
304
305int
306xmlUTF8Strsize(const xmlChar *utf, int len) {
307 const xmlChar *ptr=utf;
308 xmlChar ch;
309
310 if (len <= 0)
311 return(0);
312
313 while ( len-- > 0) {
314 if ( !*ptr )
315 break;
316 if ( (ch = *ptr++) & 0x80)
317 while ( (ch<<=1) & 0x80 )
318 ptr++;
319 }
320 return (ptr - utf);
321}
322
323
324/**
325 * xmlUTF8Strndup:
326 * @utf: the input UTF8 *
327 * @len: the len of @utf (in chars)
328 *
329 * a strndup for array of UTF8's
330 *
331 * Returns a new UTF8 * or NULL
332 */
333xmlChar *
334xmlUTF8Strndup(const xmlChar *utf, int len) {
335 xmlChar *ret;
336 int i;
337
338 if ((utf == NULL) || (len < 0)) return(NULL);
339 i = xmlUTF8Strsize(utf, len);
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000340 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
Daniel Veillard97ac1312001-05-30 19:14:17 +0000341 if (ret == NULL) {
342 xmlGenericError(xmlGenericErrorContext,
343 "malloc of %ld byte failed\n",
344 (len + 1) * (long)sizeof(xmlChar));
345 return(NULL);
346 }
347 memcpy(ret, utf, i * sizeof(xmlChar));
348 ret[i] = 0;
349 return(ret);
350}
351
352/**
353 * xmlUTF8Strpos:
354 * @utf: the input UTF8 *
355 * @pos: the position of the desired UTF8 char (in chars)
356 *
357 * a function to provide the equivalent of fetching a
358 * character from a string array
359 *
360 * Returns a pointer to the UTF8 character or NULL
361 */
362xmlChar *
363xmlUTF8Strpos(const xmlChar *utf, int pos) {
364 xmlChar ch;
365
366 if (utf == NULL) return(NULL);
367 if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
368 return(NULL);
369 while (pos--) {
370 if ((ch=*utf++) == 0) return(NULL);
371 if ( ch & 0x80 ) {
372 /* if not simple ascii, verify proper format */
373 if ( (ch & 0xc0) != 0xc0 )
374 return(NULL);
375 /* then skip over remaining bytes for this char */
376 while ( (ch <<= 1) & 0x80 )
377 if ( (*utf++ & 0xc0) != 0x80 )
378 return(NULL);
379 }
380 }
381 return((xmlChar *)utf);
382}
383
384/**
385 * xmlUTF8Strloc:
386 * @utf: the input UTF8 *
387 * @utfchar: the UTF8 character to be found
388 *
389 * a function to provide relative location of a UTF8 char
390 *
391 * Returns the relative character position of the desired char
392 * or -1 if not found
393 */
394int
395xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
396 int i, size;
397 xmlChar ch;
398
399 if (utf==NULL || utfchar==NULL) return -1;
400 size = xmlUTF8Strsize(utfchar, 1);
401 for(i=0; (ch=*utf) != 0; i++) {
402 if (xmlStrncmp(utf, utfchar, size)==0)
403 return(i);
404 utf++;
405 if ( ch & 0x80 ) {
406 /* if not simple ascii, verify proper format */
407 if ( (ch & 0xc0) != 0xc0 )
408 return(-1);
409 /* then skip over remaining bytes for this char */
410 while ( (ch <<= 1) & 0x80 )
411 if ( (*utf++ & 0xc0) != 0x80 )
412 return(-1);
413 }
414 }
415
416 return(-1);
417}
418/**
419 * xmlUTF8Strsub:
420 * @utf: a sequence of UTF-8 encoded bytes
Daniel Veillard97ac1312001-05-30 19:14:17 +0000421 * @start: relative pos of first char
422 * @len: total number to copy
423 *
424 * Note: positions are given in units of UTF-8 chars
425 *
426 * Returns a pointer to a newly created string
427 * or NULL if any problem
428 */
429
430xmlChar *
431xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
432 int i;
433 xmlChar ch;
434
435 if (utf == NULL) return(NULL);
436 if (start < 0) return(NULL);
437 if (len < 0) return(NULL);
438
439 /*
440 * Skip over any leading chars
441 */
442 for (i = 0;i < start;i++) {
443 if ((ch=*utf++) == 0) return(NULL);
444 if ( ch & 0x80 ) {
445 /* if not simple ascii, verify proper format */
446 if ( (ch & 0xc0) != 0xc0 )
447 return(NULL);
448 /* then skip over remaining bytes for this char */
449 while ( (ch <<= 1) & 0x80 )
450 if ( (*utf++ & 0xc0) != 0x80 )
451 return(NULL);
452 }
453 }
454
455 return(xmlUTF8Strndup(utf, len));
456}
457
458/************************************************************************
459 * *
460 * Conversions To/From UTF8 encoding *
461 * *
462 ************************************************************************/
463
464/**
Owen Taylor3473f882001-02-23 17:55:21 +0000465 * asciiToUTF8:
466 * @out: a pointer to an array of bytes to store the result
467 * @outlen: the length of @out
468 * @in: a pointer to an array of ASCII chars
469 * @inlen: the length of @in
470 *
471 * Take a block of ASCII chars in and try to convert it to an UTF-8
472 * block of chars out.
473 * Returns 0 if success, or -1 otherwise
474 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000475 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000476 * The value of @outlen after return is the number of ocetes consumed.
477 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000478static int
Owen Taylor3473f882001-02-23 17:55:21 +0000479asciiToUTF8(unsigned char* out, int *outlen,
480 const unsigned char* in, int *inlen) {
481 unsigned char* outstart = out;
482 const unsigned char* base = in;
483 const unsigned char* processed = in;
484 unsigned char* outend = out + *outlen;
485 const unsigned char* inend;
486 unsigned int c;
487 int bits;
488
489 inend = in + (*inlen);
490 while ((in < inend) && (out - outstart + 5 < *outlen)) {
491 c= *in++;
492
493 /* assertion: c is a single UTF-4 value */
494 if (out >= outend)
495 break;
496 if (c < 0x80) { *out++= c; bits= -6; }
497 else {
498 *outlen = out - outstart;
499 *inlen = processed - base;
500 return(-1);
501 }
502
503 for ( ; bits >= 0; bits-= 6) {
504 if (out >= outend)
505 break;
506 *out++= ((c >> bits) & 0x3F) | 0x80;
507 }
508 processed = (const unsigned char*) in;
509 }
510 *outlen = out - outstart;
511 *inlen = processed - base;
512 return(0);
513}
514
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000515#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000516/**
517 * UTF8Toascii:
518 * @out: a pointer to an array of bytes to store the result
519 * @outlen: the length of @out
520 * @in: a pointer to an array of UTF-8 chars
521 * @inlen: the length of @in
522 *
523 * Take a block of UTF-8 chars in and try to convert it to an ASCII
524 * block of chars out.
525 *
526 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
527 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000528 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000529 * The value of @outlen after return is the number of ocetes consumed.
530 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000531static int
Owen Taylor3473f882001-02-23 17:55:21 +0000532UTF8Toascii(unsigned char* out, int *outlen,
533 const unsigned char* in, int *inlen) {
534 const unsigned char* processed = in;
535 const unsigned char* outend;
536 const unsigned char* outstart = out;
537 const unsigned char* instart = in;
538 const unsigned char* inend;
539 unsigned int c, d;
540 int trailing;
541
542 if (in == NULL) {
543 /*
544 * initialization nothing to do
545 */
546 *outlen = 0;
547 *inlen = 0;
548 return(0);
549 }
550 inend = in + (*inlen);
551 outend = out + (*outlen);
552 while (in < inend) {
553 d = *in++;
554 if (d < 0x80) { c= d; trailing= 0; }
555 else if (d < 0xC0) {
556 /* trailing byte in leading position */
557 *outlen = out - outstart;
558 *inlen = processed - instart;
559 return(-2);
560 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
561 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
562 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
563 else {
564 /* no chance for this in Ascii */
565 *outlen = out - outstart;
566 *inlen = processed - instart;
567 return(-2);
568 }
569
570 if (inend - in < trailing) {
571 break;
572 }
573
574 for ( ; trailing; trailing--) {
575 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
576 break;
577 c <<= 6;
578 c |= d & 0x3F;
579 }
580
581 /* assertion: c is a single UTF-4 value */
582 if (c < 0x80) {
583 if (out >= outend)
584 break;
585 *out++ = c;
586 } else {
587 /* no chance for this in Ascii */
588 *outlen = out - outstart;
589 *inlen = processed - instart;
590 return(-2);
591 }
592 processed = in;
593 }
594 *outlen = out - outstart;
595 *inlen = processed - instart;
596 return(0);
597}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000598#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000599
600/**
601 * isolat1ToUTF8:
602 * @out: a pointer to an array of bytes to store the result
603 * @outlen: the length of @out
604 * @in: a pointer to an array of ISO Latin 1 chars
605 * @inlen: the length of @in
606 *
607 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
608 * block of chars out.
609 * Returns 0 if success, or -1 otherwise
610 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000612 * The value of @outlen after return is the number of ocetes consumed.
613 */
614int
615isolat1ToUTF8(unsigned char* out, int *outlen,
616 const unsigned char* in, int *inlen) {
617 unsigned char* outstart = out;
618 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000619 unsigned char* outend = out + *outlen;
620 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000621 const unsigned char* instop;
622 xmlChar c = *in;
Owen Taylor3473f882001-02-23 17:55:21 +0000623
624 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000625 instop = inend;
626
627 while (in < inend && out < outend - 1) {
628 if (c >= 0x80) {
Daniel Veillarddb552912002-03-21 13:27:59 +0000629 *out++= ((c >> 6) & 0x1F) | 0xC0;
Daniel Veillard02141ea2001-04-30 11:46:40 +0000630 *out++= (c & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000631 ++in;
632 c = *in;
633 }
634 if (instop - in > outend - out) instop = in + (outend - out);
635 while (c < 0x80 && in < instop) {
636 *out++ = c;
637 ++in;
638 c = *in;
639 }
640 }
641 if (in < inend && out < outend && c < 0x80) {
642 *out++ = c;
643 ++in;
Owen Taylor3473f882001-02-23 17:55:21 +0000644 }
645 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000646 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000647 return(0);
648}
649
Daniel Veillard81601f92003-01-14 13:42:37 +0000650/**
651 * UTF8ToUTF8:
652 * @out: a pointer to an array of bytes to store the result
653 * @outlen: the length of @out
654 * @inb: a pointer to an array of UTF-8 chars
655 * @inlenb: the length of @in in UTF-8 chars
656 *
657 * No op copy operation for UTF8 handling.
658 *
659 * Returns the number of byte written, or -1 by lack of space, or -2
660 * if the transcoding fails (for *in is not valid utf16 string)
661 * The value of *inlen after return is the number of octets consumed
662 * as the return value is positive, else unpredictable.
663 */
664static int
665UTF8ToUTF8(unsigned char* out, int *outlen,
666 const unsigned char* inb, int *inlenb)
667{
668 int len;
669
670 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
671 return(-1);
672 if (*outlen > *inlenb) {
673 len = *inlenb;
674 } else {
675 len = *outlen;
676 }
677 if (len < 0)
678 return(-1);
679
680 memcpy(out, inb, len);
681
682 *outlen = len;
683 *inlenb = len;
684 return(0);
685}
686
Daniel Veillarde72c7562002-05-31 09:47:30 +0000687
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000688#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000689/**
690 * UTF8Toisolat1:
691 * @out: a pointer to an array of bytes to store the result
692 * @outlen: the length of @out
693 * @in: a pointer to an array of UTF-8 chars
694 * @inlen: the length of @in
695 *
696 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
697 * block of chars out.
698 *
699 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
700 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000701 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000702 * The value of @outlen after return is the number of ocetes consumed.
703 */
704int
705UTF8Toisolat1(unsigned char* out, int *outlen,
706 const unsigned char* in, int *inlen) {
707 const unsigned char* processed = in;
708 const unsigned char* outend;
709 const unsigned char* outstart = out;
710 const unsigned char* instart = in;
711 const unsigned char* inend;
712 unsigned int c, d;
713 int trailing;
714
715 if (in == NULL) {
716 /*
717 * initialization nothing to do
718 */
719 *outlen = 0;
720 *inlen = 0;
721 return(0);
722 }
723 inend = in + (*inlen);
724 outend = out + (*outlen);
725 while (in < inend) {
726 d = *in++;
727 if (d < 0x80) { c= d; trailing= 0; }
728 else if (d < 0xC0) {
729 /* trailing byte in leading position */
730 *outlen = out - outstart;
731 *inlen = processed - instart;
732 return(-2);
733 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
734 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
735 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
736 else {
737 /* no chance for this in IsoLat1 */
738 *outlen = out - outstart;
739 *inlen = processed - instart;
740 return(-2);
741 }
742
743 if (inend - in < trailing) {
744 break;
745 }
746
747 for ( ; trailing; trailing--) {
748 if (in >= inend)
749 break;
750 if (((d= *in++) & 0xC0) != 0x80) {
751 *outlen = out - outstart;
752 *inlen = processed - instart;
753 return(-2);
754 }
755 c <<= 6;
756 c |= d & 0x3F;
757 }
758
759 /* assertion: c is a single UTF-4 value */
760 if (c <= 0xFF) {
761 if (out >= outend)
762 break;
763 *out++ = c;
764 } else {
765 /* no chance for this in IsoLat1 */
766 *outlen = out - outstart;
767 *inlen = processed - instart;
768 return(-2);
769 }
770 processed = in;
771 }
772 *outlen = out - outstart;
773 *inlen = processed - instart;
774 return(0);
775}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000776#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000777
778/**
779 * UTF16LEToUTF8:
780 * @out: a pointer to an array of bytes to store the result
781 * @outlen: the length of @out
782 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
783 * @inlenb: the length of @in in UTF-16LE chars
784 *
785 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000786 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000787 * is the same between the native type of this machine and the
788 * inputed one.
789 *
790 * Returns the number of byte written, or -1 by lack of space, or -2
791 * if the transcoding fails (for *in is not valid utf16 string)
792 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000793 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000794 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000795static int
Owen Taylor3473f882001-02-23 17:55:21 +0000796UTF16LEToUTF8(unsigned char* out, int *outlen,
797 const unsigned char* inb, int *inlenb)
798{
799 unsigned char* outstart = out;
800 const unsigned char* processed = inb;
801 unsigned char* outend = out + *outlen;
802 unsigned short* in = (unsigned short*) inb;
803 unsigned short* inend;
804 unsigned int c, d, inlen;
805 unsigned char *tmp;
806 int bits;
807
808 if ((*inlenb % 2) == 1)
809 (*inlenb)--;
810 inlen = *inlenb / 2;
811 inend = in + inlen;
812 while ((in < inend) && (out - outstart + 5 < *outlen)) {
813 if (xmlLittleEndian) {
814 c= *in++;
815 } else {
816 tmp = (unsigned char *) in;
817 c = *tmp++;
818 c = c | (((unsigned int)*tmp) << 8);
819 in++;
820 }
821 if ((c & 0xFC00) == 0xD800) { /* surrogates */
822 if (in >= inend) { /* (in > inend) shouldn't happens */
823 break;
824 }
825 if (xmlLittleEndian) {
826 d = *in++;
827 } else {
828 tmp = (unsigned char *) in;
829 d = *tmp++;
830 d = d | (((unsigned int)*tmp) << 8);
831 in++;
832 }
833 if ((d & 0xFC00) == 0xDC00) {
834 c &= 0x03FF;
835 c <<= 10;
836 c |= d & 0x03FF;
837 c += 0x10000;
838 }
839 else {
840 *outlen = out - outstart;
841 *inlenb = processed - inb;
842 return(-2);
843 }
844 }
845
846 /* assertion: c is a single UTF-4 value */
847 if (out >= outend)
848 break;
849 if (c < 0x80) { *out++= c; bits= -6; }
850 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
851 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
852 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
853
854 for ( ; bits >= 0; bits-= 6) {
855 if (out >= outend)
856 break;
857 *out++= ((c >> bits) & 0x3F) | 0x80;
858 }
859 processed = (const unsigned char*) in;
860 }
861 *outlen = out - outstart;
862 *inlenb = processed - inb;
863 return(0);
864}
865
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000866#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000867/**
868 * UTF8ToUTF16LE:
869 * @outb: a pointer to an array of bytes to store the result
870 * @outlen: the length of @outb
871 * @in: a pointer to an array of UTF-8 chars
872 * @inlen: the length of @in
873 *
874 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
875 * block of chars out.
876 *
877 * Returns the number of byte written, or -1 by lack of space, or -2
878 * if the transcoding failed.
879 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000880static int
Owen Taylor3473f882001-02-23 17:55:21 +0000881UTF8ToUTF16LE(unsigned char* outb, int *outlen,
882 const unsigned char* in, int *inlen)
883{
884 unsigned short* out = (unsigned short*) outb;
885 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000886 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000887 unsigned short* outstart= out;
888 unsigned short* outend;
889 const unsigned char* inend= in+*inlen;
890 unsigned int c, d;
891 int trailing;
892 unsigned char *tmp;
893 unsigned short tmp1, tmp2;
894
895 if (in == NULL) {
896 /*
897 * initialization, add the Byte Order Mark
898 */
899 if (*outlen >= 2) {
900 outb[0] = 0xFF;
901 outb[1] = 0xFE;
902 *outlen = 2;
903 *inlen = 0;
904#ifdef DEBUG_ENCODING
905 xmlGenericError(xmlGenericErrorContext,
906 "Added FFFE Byte Order Mark\n");
907#endif
908 return(2);
909 }
910 *outlen = 0;
911 *inlen = 0;
912 return(0);
913 }
914 outend = out + (*outlen / 2);
915 while (in < inend) {
916 d= *in++;
917 if (d < 0x80) { c= d; trailing= 0; }
918 else if (d < 0xC0) {
919 /* trailing byte in leading position */
920 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000921 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000922 return(-2);
923 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
924 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
925 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
926 else {
927 /* no chance for this in UTF-16 */
928 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000929 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000930 return(-2);
931 }
932
933 if (inend - in < trailing) {
934 break;
935 }
936
937 for ( ; trailing; trailing--) {
938 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
939 break;
940 c <<= 6;
941 c |= d & 0x3F;
942 }
943
944 /* assertion: c is a single UTF-4 value */
945 if (c < 0x10000) {
946 if (out >= outend)
947 break;
948 if (xmlLittleEndian) {
949 *out++ = c;
950 } else {
951 tmp = (unsigned char *) out;
952 *tmp = c ;
953 *(tmp + 1) = c >> 8 ;
954 out++;
955 }
956 }
957 else if (c < 0x110000) {
958 if (out+1 >= outend)
959 break;
960 c -= 0x10000;
961 if (xmlLittleEndian) {
962 *out++ = 0xD800 | (c >> 10);
963 *out++ = 0xDC00 | (c & 0x03FF);
964 } else {
965 tmp1 = 0xD800 | (c >> 10);
966 tmp = (unsigned char *) out;
967 *tmp = (unsigned char) tmp1;
968 *(tmp + 1) = tmp1 >> 8;
969 out++;
970
971 tmp2 = 0xDC00 | (c & 0x03FF);
972 tmp = (unsigned char *) out;
973 *tmp = (unsigned char) tmp2;
974 *(tmp + 1) = tmp2 >> 8;
975 out++;
976 }
977 }
978 else
979 break;
980 processed = in;
981 }
982 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000983 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000984 return(0);
985}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000986#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000987
988/**
989 * UTF16BEToUTF8:
990 * @out: a pointer to an array of bytes to store the result
991 * @outlen: the length of @out
992 * @inb: a pointer to an array of UTF-16 passwd as a byte array
993 * @inlenb: the length of @in in UTF-16 chars
994 *
995 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000996 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000997 * is the same between the native type of this machine and the
998 * inputed one.
999 *
1000 * Returns the number of byte written, or -1 by lack of space, or -2
1001 * if the transcoding fails (for *in is not valid utf16 string)
1002 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001003 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001004 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001005static int
Owen Taylor3473f882001-02-23 17:55:21 +00001006UTF16BEToUTF8(unsigned char* out, int *outlen,
1007 const unsigned char* inb, int *inlenb)
1008{
1009 unsigned char* outstart = out;
1010 const unsigned char* processed = inb;
1011 unsigned char* outend = out + *outlen;
1012 unsigned short* in = (unsigned short*) inb;
1013 unsigned short* inend;
1014 unsigned int c, d, inlen;
1015 unsigned char *tmp;
1016 int bits;
1017
1018 if ((*inlenb % 2) == 1)
1019 (*inlenb)--;
1020 inlen = *inlenb / 2;
1021 inend= in + inlen;
1022 while (in < inend) {
1023 if (xmlLittleEndian) {
1024 tmp = (unsigned char *) in;
1025 c = *tmp++;
1026 c = c << 8;
1027 c = c | (unsigned int) *tmp;
1028 in++;
1029 } else {
1030 c= *in++;
1031 }
1032 if ((c & 0xFC00) == 0xD800) { /* surrogates */
1033 if (in >= inend) { /* (in > inend) shouldn't happens */
1034 *outlen = out - outstart;
1035 *inlenb = processed - inb;
1036 return(-2);
1037 }
1038 if (xmlLittleEndian) {
1039 tmp = (unsigned char *) in;
1040 d = *tmp++;
1041 d = d << 8;
1042 d = d | (unsigned int) *tmp;
1043 in++;
1044 } else {
1045 d= *in++;
1046 }
1047 if ((d & 0xFC00) == 0xDC00) {
1048 c &= 0x03FF;
1049 c <<= 10;
1050 c |= d & 0x03FF;
1051 c += 0x10000;
1052 }
1053 else {
1054 *outlen = out - outstart;
1055 *inlenb = processed - inb;
1056 return(-2);
1057 }
1058 }
1059
1060 /* assertion: c is a single UTF-4 value */
1061 if (out >= outend)
1062 break;
1063 if (c < 0x80) { *out++= c; bits= -6; }
1064 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
1065 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
1066 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1067
1068 for ( ; bits >= 0; bits-= 6) {
1069 if (out >= outend)
1070 break;
1071 *out++= ((c >> bits) & 0x3F) | 0x80;
1072 }
1073 processed = (const unsigned char*) in;
1074 }
1075 *outlen = out - outstart;
1076 *inlenb = processed - inb;
1077 return(0);
1078}
1079
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001080#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001081/**
1082 * UTF8ToUTF16BE:
1083 * @outb: a pointer to an array of bytes to store the result
1084 * @outlen: the length of @outb
1085 * @in: a pointer to an array of UTF-8 chars
1086 * @inlen: the length of @in
1087 *
1088 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1089 * block of chars out.
1090 *
1091 * Returns the number of byte written, or -1 by lack of space, or -2
1092 * if the transcoding failed.
1093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001094static int
Owen Taylor3473f882001-02-23 17:55:21 +00001095UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1096 const unsigned char* in, int *inlen)
1097{
1098 unsigned short* out = (unsigned short*) outb;
1099 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001100 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +00001101 unsigned short* outstart= out;
1102 unsigned short* outend;
1103 const unsigned char* inend= in+*inlen;
1104 unsigned int c, d;
1105 int trailing;
1106 unsigned char *tmp;
1107 unsigned short tmp1, tmp2;
1108
1109 if (in == NULL) {
1110 /*
1111 * initialization, add the Byte Order Mark
1112 */
1113 if (*outlen >= 2) {
1114 outb[0] = 0xFE;
1115 outb[1] = 0xFF;
1116 *outlen = 2;
1117 *inlen = 0;
1118#ifdef DEBUG_ENCODING
1119 xmlGenericError(xmlGenericErrorContext,
1120 "Added FEFF Byte Order Mark\n");
1121#endif
1122 return(2);
1123 }
1124 *outlen = 0;
1125 *inlen = 0;
1126 return(0);
1127 }
1128 outend = out + (*outlen / 2);
1129 while (in < inend) {
1130 d= *in++;
1131 if (d < 0x80) { c= d; trailing= 0; }
1132 else if (d < 0xC0) {
1133 /* trailing byte in leading position */
1134 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001135 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001136 return(-2);
1137 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1138 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1139 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1140 else {
1141 /* no chance for this in UTF-16 */
1142 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001143 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001144 return(-2);
1145 }
1146
1147 if (inend - in < trailing) {
1148 break;
1149 }
1150
1151 for ( ; trailing; trailing--) {
1152 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
1153 c <<= 6;
1154 c |= d & 0x3F;
1155 }
1156
1157 /* assertion: c is a single UTF-4 value */
1158 if (c < 0x10000) {
1159 if (out >= outend) break;
1160 if (xmlLittleEndian) {
1161 tmp = (unsigned char *) out;
1162 *tmp = c >> 8;
1163 *(tmp + 1) = c;
1164 out++;
1165 } else {
1166 *out++ = c;
1167 }
1168 }
1169 else if (c < 0x110000) {
1170 if (out+1 >= outend) break;
1171 c -= 0x10000;
1172 if (xmlLittleEndian) {
1173 tmp1 = 0xD800 | (c >> 10);
1174 tmp = (unsigned char *) out;
1175 *tmp = tmp1 >> 8;
1176 *(tmp + 1) = (unsigned char) tmp1;
1177 out++;
1178
1179 tmp2 = 0xDC00 | (c & 0x03FF);
1180 tmp = (unsigned char *) out;
1181 *tmp = tmp2 >> 8;
1182 *(tmp + 1) = (unsigned char) tmp2;
1183 out++;
1184 } else {
1185 *out++ = 0xD800 | (c >> 10);
1186 *out++ = 0xDC00 | (c & 0x03FF);
1187 }
1188 }
1189 else
1190 break;
1191 processed = in;
1192 }
1193 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001194 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001195 return(0);
1196}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001197#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001198
Daniel Veillard97ac1312001-05-30 19:14:17 +00001199/************************************************************************
1200 * *
1201 * Generic encoding handling routines *
1202 * *
1203 ************************************************************************/
1204
Owen Taylor3473f882001-02-23 17:55:21 +00001205/**
1206 * xmlDetectCharEncoding:
1207 * @in: a pointer to the first bytes of the XML entity, must be at least
1208 * 4 bytes long.
1209 * @len: pointer to the length of the buffer
1210 *
1211 * Guess the encoding of the entity using the first bytes of the entity content
1212 * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
1213 *
1214 * Returns one of the XML_CHAR_ENCODING_... values.
1215 */
1216xmlCharEncoding
1217xmlDetectCharEncoding(const unsigned char* in, int len)
1218{
1219 if (len >= 4) {
1220 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1221 (in[2] == 0x00) && (in[3] == 0x3C))
1222 return(XML_CHAR_ENCODING_UCS4BE);
1223 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1224 (in[2] == 0x00) && (in[3] == 0x00))
1225 return(XML_CHAR_ENCODING_UCS4LE);
1226 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1227 (in[2] == 0x3C) && (in[3] == 0x00))
1228 return(XML_CHAR_ENCODING_UCS4_2143);
1229 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1230 (in[2] == 0x00) && (in[3] == 0x00))
1231 return(XML_CHAR_ENCODING_UCS4_3412);
1232 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
1233 (in[2] == 0xA7) && (in[3] == 0x94))
1234 return(XML_CHAR_ENCODING_EBCDIC);
1235 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
1236 (in[2] == 0x78) && (in[3] == 0x6D))
1237 return(XML_CHAR_ENCODING_UTF8);
1238 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001239 if (len >= 3) {
1240 /*
1241 * Errata on XML-1.0 June 20 2001
1242 * We now allow an UTF8 encoded BOM
1243 */
1244 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1245 (in[2] == 0xBF))
1246 return(XML_CHAR_ENCODING_UTF8);
1247 }
Owen Taylor3473f882001-02-23 17:55:21 +00001248 if (len >= 2) {
1249 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1250 return(XML_CHAR_ENCODING_UTF16BE);
1251 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1252 return(XML_CHAR_ENCODING_UTF16LE);
1253 }
1254 return(XML_CHAR_ENCODING_NONE);
1255}
1256
1257/**
1258 * xmlCleanupEncodingAliases:
1259 *
1260 * Unregisters all aliases
1261 */
1262void
1263xmlCleanupEncodingAliases(void) {
1264 int i;
1265
1266 if (xmlCharEncodingAliases == NULL)
1267 return;
1268
1269 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1270 if (xmlCharEncodingAliases[i].name != NULL)
1271 xmlFree((char *) xmlCharEncodingAliases[i].name);
1272 if (xmlCharEncodingAliases[i].alias != NULL)
1273 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1274 }
1275 xmlCharEncodingAliasesNb = 0;
1276 xmlCharEncodingAliasesMax = 0;
1277 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +00001278 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001279}
1280
1281/**
1282 * xmlGetEncodingAlias:
1283 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1284 *
1285 * Lookup an encoding name for the given alias.
1286 *
1287 * Returns NULL if not found the original name otherwise
1288 */
1289const char *
1290xmlGetEncodingAlias(const char *alias) {
1291 int i;
1292 char upper[100];
1293
1294 if (alias == NULL)
1295 return(NULL);
1296
1297 if (xmlCharEncodingAliases == NULL)
1298 return(NULL);
1299
1300 for (i = 0;i < 99;i++) {
1301 upper[i] = toupper(alias[i]);
1302 if (upper[i] == 0) break;
1303 }
1304 upper[i] = 0;
1305
1306 /*
1307 * Walk down the list looking for a definition of the alias
1308 */
1309 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1310 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1311 return(xmlCharEncodingAliases[i].name);
1312 }
1313 }
1314 return(NULL);
1315}
1316
1317/**
1318 * xmlAddEncodingAlias:
1319 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1320 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1321 *
1322 * Registers and alias @alias for an encoding named @name. Existing alias
1323 * will be overwritten.
1324 *
1325 * Returns 0 in case of success, -1 in case of error
1326 */
1327int
1328xmlAddEncodingAlias(const char *name, const char *alias) {
1329 int i;
1330 char upper[100];
1331
1332 if ((name == NULL) || (alias == NULL))
1333 return(-1);
1334
1335 for (i = 0;i < 99;i++) {
1336 upper[i] = toupper(alias[i]);
1337 if (upper[i] == 0) break;
1338 }
1339 upper[i] = 0;
1340
1341 if (xmlCharEncodingAliases == NULL) {
1342 xmlCharEncodingAliasesNb = 0;
1343 xmlCharEncodingAliasesMax = 20;
1344 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1345 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1346 if (xmlCharEncodingAliases == NULL)
1347 return(-1);
1348 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1349 xmlCharEncodingAliasesMax *= 2;
1350 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1351 xmlRealloc(xmlCharEncodingAliases,
1352 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1353 }
1354 /*
1355 * Walk down the list looking for a definition of the alias
1356 */
1357 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1358 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1359 /*
1360 * Replace the definition.
1361 */
1362 xmlFree((char *) xmlCharEncodingAliases[i].name);
1363 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1364 return(0);
1365 }
1366 }
1367 /*
1368 * Add the definition
1369 */
1370 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1371 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1372 xmlCharEncodingAliasesNb++;
1373 return(0);
1374}
1375
1376/**
1377 * xmlDelEncodingAlias:
1378 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1379 *
1380 * Unregisters an encoding alias @alias
1381 *
1382 * Returns 0 in case of success, -1 in case of error
1383 */
1384int
1385xmlDelEncodingAlias(const char *alias) {
1386 int i;
1387
1388 if (alias == NULL)
1389 return(-1);
1390
1391 if (xmlCharEncodingAliases == NULL)
1392 return(-1);
1393 /*
1394 * Walk down the list looking for a definition of the alias
1395 */
1396 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1397 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1398 xmlFree((char *) xmlCharEncodingAliases[i].name);
1399 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1400 xmlCharEncodingAliasesNb--;
1401 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1402 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1403 return(0);
1404 }
1405 }
1406 return(-1);
1407}
1408
1409/**
1410 * xmlParseCharEncoding:
1411 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1412 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001413 * Compare the string to the known encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001414 * that the comparison is case insensitive accordingly to the section
1415 * [XML] 4.3.3 Character Encoding in Entities.
1416 *
1417 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1418 * if not recognized.
1419 */
1420xmlCharEncoding
1421xmlParseCharEncoding(const char* name)
1422{
1423 const char *alias;
1424 char upper[500];
1425 int i;
1426
1427 if (name == NULL)
1428 return(XML_CHAR_ENCODING_NONE);
1429
1430 /*
1431 * Do the alias resolution
1432 */
1433 alias = xmlGetEncodingAlias(name);
1434 if (alias != NULL)
1435 name = alias;
1436
1437 for (i = 0;i < 499;i++) {
1438 upper[i] = toupper(name[i]);
1439 if (upper[i] == 0) break;
1440 }
1441 upper[i] = 0;
1442
1443 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1444 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1445 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1446
1447 /*
1448 * NOTE: if we were able to parse this, the endianness of UTF16 is
1449 * already found and in use
1450 */
1451 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1452 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1453
1454 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1455 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1456 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1457
1458 /*
1459 * NOTE: if we were able to parse this, the endianness of UCS4 is
1460 * already found and in use
1461 */
1462 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1463 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1464 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1465
1466
1467 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1468 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1469 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1470
1471 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1472 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1473 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1474
1475 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1476 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1477 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1478 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1479 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1480 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1481 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1482
1483 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1484 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1485 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1486
1487#ifdef DEBUG_ENCODING
1488 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1489#endif
1490 return(XML_CHAR_ENCODING_ERROR);
1491}
1492
1493/**
1494 * xmlGetCharEncodingName:
1495 * @enc: the encoding
1496 *
1497 * The "canonical" name for XML encoding.
1498 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1499 * Section 4.3.3 Character Encoding in Entities
1500 *
1501 * Returns the canonical name for the given encoding
1502 */
1503
1504const char*
1505xmlGetCharEncodingName(xmlCharEncoding enc) {
1506 switch (enc) {
1507 case XML_CHAR_ENCODING_ERROR:
1508 return(NULL);
1509 case XML_CHAR_ENCODING_NONE:
1510 return(NULL);
1511 case XML_CHAR_ENCODING_UTF8:
1512 return("UTF-8");
1513 case XML_CHAR_ENCODING_UTF16LE:
1514 return("UTF-16");
1515 case XML_CHAR_ENCODING_UTF16BE:
1516 return("UTF-16");
1517 case XML_CHAR_ENCODING_EBCDIC:
1518 return("EBCDIC");
1519 case XML_CHAR_ENCODING_UCS4LE:
1520 return("ISO-10646-UCS-4");
1521 case XML_CHAR_ENCODING_UCS4BE:
1522 return("ISO-10646-UCS-4");
1523 case XML_CHAR_ENCODING_UCS4_2143:
1524 return("ISO-10646-UCS-4");
1525 case XML_CHAR_ENCODING_UCS4_3412:
1526 return("ISO-10646-UCS-4");
1527 case XML_CHAR_ENCODING_UCS2:
1528 return("ISO-10646-UCS-2");
1529 case XML_CHAR_ENCODING_8859_1:
1530 return("ISO-8859-1");
1531 case XML_CHAR_ENCODING_8859_2:
1532 return("ISO-8859-2");
1533 case XML_CHAR_ENCODING_8859_3:
1534 return("ISO-8859-3");
1535 case XML_CHAR_ENCODING_8859_4:
1536 return("ISO-8859-4");
1537 case XML_CHAR_ENCODING_8859_5:
1538 return("ISO-8859-5");
1539 case XML_CHAR_ENCODING_8859_6:
1540 return("ISO-8859-6");
1541 case XML_CHAR_ENCODING_8859_7:
1542 return("ISO-8859-7");
1543 case XML_CHAR_ENCODING_8859_8:
1544 return("ISO-8859-8");
1545 case XML_CHAR_ENCODING_8859_9:
1546 return("ISO-8859-9");
1547 case XML_CHAR_ENCODING_2022_JP:
1548 return("ISO-2022-JP");
1549 case XML_CHAR_ENCODING_SHIFT_JIS:
1550 return("Shift-JIS");
1551 case XML_CHAR_ENCODING_EUC_JP:
1552 return("EUC-JP");
1553 case XML_CHAR_ENCODING_ASCII:
1554 return(NULL);
1555 }
1556 return(NULL);
1557}
1558
Daniel Veillard97ac1312001-05-30 19:14:17 +00001559/************************************************************************
1560 * *
1561 * Char encoding handlers *
1562 * *
1563 ************************************************************************/
1564
Owen Taylor3473f882001-02-23 17:55:21 +00001565
1566/* the size should be growable, but it's not a big deal ... */
1567#define MAX_ENCODING_HANDLERS 50
1568static xmlCharEncodingHandlerPtr *handlers = NULL;
1569static int nbCharEncodingHandler = 0;
1570
1571/*
1572 * The default is UTF-8 for XML, that's also the default used for the
1573 * parser internals, so the default encoding handler is NULL
1574 */
1575
1576static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1577
1578/**
1579 * xmlNewCharEncodingHandler:
1580 * @name: the encoding name, in UTF-8 format (ASCII actually)
1581 * @input: the xmlCharEncodingInputFunc to read that encoding
1582 * @output: the xmlCharEncodingOutputFunc to write that encoding
1583 *
1584 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001585 *
Owen Taylor3473f882001-02-23 17:55:21 +00001586 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1587 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001588xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001589xmlNewCharEncodingHandler(const char *name,
1590 xmlCharEncodingInputFunc input,
1591 xmlCharEncodingOutputFunc output) {
1592 xmlCharEncodingHandlerPtr handler;
1593 const char *alias;
1594 char upper[500];
1595 int i;
1596 char *up = 0;
1597
1598 /*
1599 * Do the alias resolution
1600 */
1601 alias = xmlGetEncodingAlias(name);
1602 if (alias != NULL)
1603 name = alias;
1604
1605 /*
1606 * Keep only the uppercase version of the encoding.
1607 */
1608 if (name == NULL) {
1609 xmlGenericError(xmlGenericErrorContext,
1610 "xmlNewCharEncodingHandler : no name !\n");
1611 return(NULL);
1612 }
1613 for (i = 0;i < 499;i++) {
1614 upper[i] = toupper(name[i]);
1615 if (upper[i] == 0) break;
1616 }
1617 upper[i] = 0;
1618 up = xmlMemStrdup(upper);
1619 if (up == NULL) {
1620 xmlGenericError(xmlGenericErrorContext,
1621 "xmlNewCharEncodingHandler : out of memory !\n");
1622 return(NULL);
1623 }
1624
1625 /*
1626 * allocate and fill-up an handler block.
1627 */
1628 handler = (xmlCharEncodingHandlerPtr)
1629 xmlMalloc(sizeof(xmlCharEncodingHandler));
1630 if (handler == NULL) {
1631 xmlGenericError(xmlGenericErrorContext,
1632 "xmlNewCharEncodingHandler : out of memory !\n");
1633 return(NULL);
1634 }
1635 handler->input = input;
1636 handler->output = output;
1637 handler->name = up;
1638
1639#ifdef LIBXML_ICONV_ENABLED
1640 handler->iconv_in = NULL;
1641 handler->iconv_out = NULL;
1642#endif /* LIBXML_ICONV_ENABLED */
1643
1644 /*
1645 * registers and returns the handler.
1646 */
1647 xmlRegisterCharEncodingHandler(handler);
1648#ifdef DEBUG_ENCODING
1649 xmlGenericError(xmlGenericErrorContext,
1650 "Registered encoding handler for %s\n", name);
1651#endif
1652 return(handler);
1653}
1654
1655/**
1656 * xmlInitCharEncodingHandlers:
1657 *
1658 * Initialize the char encoding support, it registers the default
1659 * encoding supported.
1660 * NOTE: while public, this function usually doesn't need to be called
1661 * in normal processing.
1662 */
1663void
1664xmlInitCharEncodingHandlers(void) {
1665 unsigned short int tst = 0x1234;
1666 unsigned char *ptr = (unsigned char *) &tst;
1667
1668 if (handlers != NULL) return;
1669
1670 handlers = (xmlCharEncodingHandlerPtr *)
1671 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1672
1673 if (*ptr == 0x12) xmlLittleEndian = 0;
1674 else if (*ptr == 0x34) xmlLittleEndian = 1;
1675 else xmlGenericError(xmlGenericErrorContext,
1676 "Odd problem at endianness detection\n");
1677
1678 if (handlers == NULL) {
1679 xmlGenericError(xmlGenericErrorContext,
1680 "xmlInitCharEncodingHandlers : out of memory !\n");
1681 return;
1682 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001683 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001684#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001685 xmlUTF16LEHandler =
1686 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1687 xmlUTF16BEHandler =
1688 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1689 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1690 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001691 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001692#ifdef LIBXML_HTML_ENABLED
1693 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1694#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001695#else
1696 xmlUTF16LEHandler =
1697 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1698 xmlUTF16BEHandler =
1699 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1700 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1701 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1702 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1703#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001704#ifndef LIBXML_ICONV_ENABLED
1705#ifdef LIBXML_ISO8859X_ENABLED
1706 xmlRegisterCharEncodingHandlersISO8859x ();
1707#endif
1708#endif
1709
Owen Taylor3473f882001-02-23 17:55:21 +00001710}
1711
1712/**
1713 * xmlCleanupCharEncodingHandlers:
1714 *
1715 * Cleanup the memory allocated for the char encoding support, it
1716 * unregisters all the encoding handlers and the aliases.
1717 */
1718void
1719xmlCleanupCharEncodingHandlers(void) {
1720 xmlCleanupEncodingAliases();
1721
1722 if (handlers == NULL) return;
1723
1724 for (;nbCharEncodingHandler > 0;) {
1725 nbCharEncodingHandler--;
1726 if (handlers[nbCharEncodingHandler] != NULL) {
1727 if (handlers[nbCharEncodingHandler]->name != NULL)
1728 xmlFree(handlers[nbCharEncodingHandler]->name);
1729 xmlFree(handlers[nbCharEncodingHandler]);
1730 }
1731 }
1732 xmlFree(handlers);
1733 handlers = NULL;
1734 nbCharEncodingHandler = 0;
1735 xmlDefaultCharEncodingHandler = NULL;
1736}
1737
1738/**
1739 * xmlRegisterCharEncodingHandler:
1740 * @handler: the xmlCharEncodingHandlerPtr handler block
1741 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001742 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001743 */
1744void
1745xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1746 if (handlers == NULL) xmlInitCharEncodingHandlers();
1747 if (handler == NULL) {
1748 xmlGenericError(xmlGenericErrorContext,
1749 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1750 return;
1751 }
1752
1753 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1754 xmlGenericError(xmlGenericErrorContext,
1755 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1756 xmlGenericError(xmlGenericErrorContext,
1757 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1758 return;
1759 }
1760 handlers[nbCharEncodingHandler++] = handler;
1761}
1762
1763/**
1764 * xmlGetCharEncodingHandler:
1765 * @enc: an xmlCharEncoding value.
1766 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001767 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001768 *
1769 * Returns the handler or NULL if not found
1770 */
1771xmlCharEncodingHandlerPtr
1772xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1773 xmlCharEncodingHandlerPtr handler;
1774
1775 if (handlers == NULL) xmlInitCharEncodingHandlers();
1776 switch (enc) {
1777 case XML_CHAR_ENCODING_ERROR:
1778 return(NULL);
1779 case XML_CHAR_ENCODING_NONE:
1780 return(NULL);
1781 case XML_CHAR_ENCODING_UTF8:
1782 return(NULL);
1783 case XML_CHAR_ENCODING_UTF16LE:
1784 return(xmlUTF16LEHandler);
1785 case XML_CHAR_ENCODING_UTF16BE:
1786 return(xmlUTF16BEHandler);
1787 case XML_CHAR_ENCODING_EBCDIC:
1788 handler = xmlFindCharEncodingHandler("EBCDIC");
1789 if (handler != NULL) return(handler);
1790 handler = xmlFindCharEncodingHandler("ebcdic");
1791 if (handler != NULL) return(handler);
1792 break;
1793 case XML_CHAR_ENCODING_UCS4BE:
1794 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1795 if (handler != NULL) return(handler);
1796 handler = xmlFindCharEncodingHandler("UCS-4");
1797 if (handler != NULL) return(handler);
1798 handler = xmlFindCharEncodingHandler("UCS4");
1799 if (handler != NULL) return(handler);
1800 break;
1801 case XML_CHAR_ENCODING_UCS4LE:
1802 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1803 if (handler != NULL) return(handler);
1804 handler = xmlFindCharEncodingHandler("UCS-4");
1805 if (handler != NULL) return(handler);
1806 handler = xmlFindCharEncodingHandler("UCS4");
1807 if (handler != NULL) return(handler);
1808 break;
1809 case XML_CHAR_ENCODING_UCS4_2143:
1810 break;
1811 case XML_CHAR_ENCODING_UCS4_3412:
1812 break;
1813 case XML_CHAR_ENCODING_UCS2:
1814 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1815 if (handler != NULL) return(handler);
1816 handler = xmlFindCharEncodingHandler("UCS-2");
1817 if (handler != NULL) return(handler);
1818 handler = xmlFindCharEncodingHandler("UCS2");
1819 if (handler != NULL) return(handler);
1820 break;
1821
1822 /*
1823 * We used to keep ISO Latin encodings native in the
1824 * generated data. This led to so many problems that
1825 * this has been removed. One can still change this
1826 * back by registering no-ops encoders for those
1827 */
1828 case XML_CHAR_ENCODING_8859_1:
1829 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1830 if (handler != NULL) return(handler);
1831 break;
1832 case XML_CHAR_ENCODING_8859_2:
1833 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1834 if (handler != NULL) return(handler);
1835 break;
1836 case XML_CHAR_ENCODING_8859_3:
1837 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1838 if (handler != NULL) return(handler);
1839 break;
1840 case XML_CHAR_ENCODING_8859_4:
1841 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1842 if (handler != NULL) return(handler);
1843 break;
1844 case XML_CHAR_ENCODING_8859_5:
1845 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1846 if (handler != NULL) return(handler);
1847 break;
1848 case XML_CHAR_ENCODING_8859_6:
1849 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1850 if (handler != NULL) return(handler);
1851 break;
1852 case XML_CHAR_ENCODING_8859_7:
1853 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1854 if (handler != NULL) return(handler);
1855 break;
1856 case XML_CHAR_ENCODING_8859_8:
1857 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1858 if (handler != NULL) return(handler);
1859 break;
1860 case XML_CHAR_ENCODING_8859_9:
1861 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1862 if (handler != NULL) return(handler);
1863 break;
1864
1865
1866 case XML_CHAR_ENCODING_2022_JP:
1867 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1868 if (handler != NULL) return(handler);
1869 break;
1870 case XML_CHAR_ENCODING_SHIFT_JIS:
1871 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1872 if (handler != NULL) return(handler);
1873 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1874 if (handler != NULL) return(handler);
1875 handler = xmlFindCharEncodingHandler("Shift_JIS");
1876 if (handler != NULL) return(handler);
1877 break;
1878 case XML_CHAR_ENCODING_EUC_JP:
1879 handler = xmlFindCharEncodingHandler("EUC-JP");
1880 if (handler != NULL) return(handler);
1881 break;
1882 default:
1883 break;
1884 }
1885
1886#ifdef DEBUG_ENCODING
1887 xmlGenericError(xmlGenericErrorContext,
1888 "No handler found for encoding %d\n", enc);
1889#endif
1890 return(NULL);
1891}
1892
1893/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001894 * xmlFindCharEncodingHandler:
1895 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001896 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001897 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001898 *
1899 * Returns the handler or NULL if not found
1900 */
1901xmlCharEncodingHandlerPtr
1902xmlFindCharEncodingHandler(const char *name) {
1903 const char *nalias;
1904 const char *norig;
1905 xmlCharEncoding alias;
1906#ifdef LIBXML_ICONV_ENABLED
1907 xmlCharEncodingHandlerPtr enc;
1908 iconv_t icv_in, icv_out;
1909#endif /* LIBXML_ICONV_ENABLED */
1910 char upper[100];
1911 int i;
1912
1913 if (handlers == NULL) xmlInitCharEncodingHandlers();
1914 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1915 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1916
1917 /*
1918 * Do the alias resolution
1919 */
1920 norig = name;
1921 nalias = xmlGetEncodingAlias(name);
1922 if (nalias != NULL)
1923 name = nalias;
1924
1925 /*
1926 * Check first for directly registered encoding names
1927 */
1928 for (i = 0;i < 99;i++) {
1929 upper[i] = toupper(name[i]);
1930 if (upper[i] == 0) break;
1931 }
1932 upper[i] = 0;
1933
1934 for (i = 0;i < nbCharEncodingHandler; i++)
1935 if (!strcmp(upper, handlers[i]->name)) {
1936#ifdef DEBUG_ENCODING
1937 xmlGenericError(xmlGenericErrorContext,
1938 "Found registered handler for encoding %s\n", name);
1939#endif
1940 return(handlers[i]);
1941 }
1942
1943#ifdef LIBXML_ICONV_ENABLED
1944 /* check whether iconv can handle this */
1945 icv_in = iconv_open("UTF-8", name);
1946 icv_out = iconv_open(name, "UTF-8");
1947 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1948 enc = (xmlCharEncodingHandlerPtr)
1949 xmlMalloc(sizeof(xmlCharEncodingHandler));
1950 if (enc == NULL) {
1951 iconv_close(icv_in);
1952 iconv_close(icv_out);
1953 return(NULL);
1954 }
1955 enc->name = xmlMemStrdup(name);
1956 enc->input = NULL;
1957 enc->output = NULL;
1958 enc->iconv_in = icv_in;
1959 enc->iconv_out = icv_out;
1960#ifdef DEBUG_ENCODING
1961 xmlGenericError(xmlGenericErrorContext,
1962 "Found iconv handler for encoding %s\n", name);
1963#endif
1964 return enc;
1965 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1966 xmlGenericError(xmlGenericErrorContext,
1967 "iconv : problems with filters for '%s'\n", name);
1968 }
1969#endif /* LIBXML_ICONV_ENABLED */
1970
1971#ifdef DEBUG_ENCODING
1972 xmlGenericError(xmlGenericErrorContext,
1973 "No handler found for encoding %s\n", name);
1974#endif
1975
1976 /*
1977 * Fallback using the canonical names
1978 */
1979 alias = xmlParseCharEncoding(norig);
1980 if (alias != XML_CHAR_ENCODING_ERROR) {
1981 const char* canon;
1982 canon = xmlGetCharEncodingName(alias);
1983 if ((canon != NULL) && (strcmp(name, canon))) {
1984 return(xmlFindCharEncodingHandler(canon));
1985 }
1986 }
1987
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001988 /*
Daniel Veillardd0c9c322003-10-10 00:49:42 +00001989 * If nothing was found and it is "UTF-16" then use the Little endian
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001990 * version.
1991 */
1992 if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
1993 (xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
1994 return(xmlUTF16LEHandler);
1995
Owen Taylor3473f882001-02-23 17:55:21 +00001996 return(NULL);
1997}
1998
Daniel Veillard97ac1312001-05-30 19:14:17 +00001999/************************************************************************
2000 * *
2001 * ICONV based generic conversion functions *
2002 * *
2003 ************************************************************************/
2004
Owen Taylor3473f882001-02-23 17:55:21 +00002005#ifdef LIBXML_ICONV_ENABLED
2006/**
2007 * xmlIconvWrapper:
2008 * @cd: iconv converter data structure
2009 * @out: a pointer to an array of bytes to store the result
2010 * @outlen: the length of @out
2011 * @in: a pointer to an array of ISO Latin 1 chars
2012 * @inlen: the length of @in
2013 *
2014 * Returns 0 if success, or
2015 * -1 by lack of space, or
2016 * -2 if the transcoding fails (for *in is not valid utf8 string or
2017 * the result of transformation can't fit into the encoding we want), or
2018 * -3 if there the last byte can't form a single output char.
2019 *
2020 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002021 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00002022 * The value of @outlen after return is the number of ocetes consumed.
2023 */
2024static int
2025xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00002026 unsigned char *out, int *outlen,
2027 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00002028
Daniel Veillard9403a042001-05-28 11:00:53 +00002029 size_t icv_inlen = *inlen, icv_outlen = *outlen;
2030 const char *icv_in = (const char *) in;
2031 char *icv_out = (char *) out;
2032 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002033
Darin Adler699613b2001-07-27 22:47:14 +00002034 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00002035 if (in != NULL) {
2036 *inlen -= icv_inlen;
2037 *outlen -= icv_outlen;
2038 } else {
2039 *inlen = 0;
2040 *outlen = 0;
2041 }
2042 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002043#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00002044 if (errno == EILSEQ) {
2045 return -2;
2046 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002047#endif
2048#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00002049 if (errno == E2BIG) {
2050 return -1;
2051 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002052#endif
2053#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00002054 if (errno == EINVAL) {
2055 return -3;
2056 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002057#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00002058 {
2059 return -3;
2060 }
2061 }
2062 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002063}
2064#endif /* LIBXML_ICONV_ENABLED */
2065
Daniel Veillard97ac1312001-05-30 19:14:17 +00002066/************************************************************************
2067 * *
2068 * The real API used by libxml for on-the-fly conversion *
2069 * *
2070 ************************************************************************/
2071
Owen Taylor3473f882001-02-23 17:55:21 +00002072/**
2073 * xmlCharEncFirstLine:
2074 * @handler: char enconding transformation data structure
2075 * @out: an xmlBuffer for the output.
2076 * @in: an xmlBuffer for the input
2077 *
2078 * Front-end for the encoding handler input function, but handle only
2079 * the very first line, i.e. limit itself to 45 chars.
2080 *
2081 * Returns the number of byte written if success, or
2082 * -1 general error
2083 * -2 if the transcoding fails (for *in is not valid utf8 string or
2084 * the result of transformation can't fit into the encoding we want), or
2085 */
2086int
2087xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2088 xmlBufferPtr in) {
2089 int ret = -2;
2090 int written;
2091 int toconv;
2092
2093 if (handler == NULL) return(-1);
2094 if (out == NULL) return(-1);
2095 if (in == NULL) return(-1);
2096
2097 written = out->size - out->use;
2098 toconv = in->use;
2099 if (toconv * 2 >= written) {
2100 xmlBufferGrow(out, toconv);
2101 written = out->size - out->use - 1;
2102 }
2103
2104 /*
2105 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2106 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002107 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00002108 */
2109 written = 45;
2110
2111 if (handler->input != NULL) {
2112 ret = handler->input(&out->content[out->use], &written,
2113 in->content, &toconv);
2114 xmlBufferShrink(in, toconv);
2115 out->use += written;
2116 out->content[out->use] = 0;
2117 }
2118#ifdef LIBXML_ICONV_ENABLED
2119 else if (handler->iconv_in != NULL) {
2120 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2121 &written, in->content, &toconv);
2122 xmlBufferShrink(in, toconv);
2123 out->use += written;
2124 out->content[out->use] = 0;
2125 if (ret == -1) ret = -3;
2126 }
2127#endif /* LIBXML_ICONV_ENABLED */
2128#ifdef DEBUG_ENCODING
2129 switch (ret) {
2130 case 0:
2131 xmlGenericError(xmlGenericErrorContext,
2132 "converted %d bytes to %d bytes of input\n",
2133 toconv, written);
2134 break;
2135 case -1:
2136 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2137 toconv, written, in->use);
2138 break;
2139 case -2:
2140 xmlGenericError(xmlGenericErrorContext,
2141 "input conversion failed due to input error\n");
2142 break;
2143 case -3:
2144 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2145 toconv, written, in->use);
2146 break;
2147 default:
2148 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2149 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002150#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002151 /*
2152 * Ignore when input buffer is not on a boundary
2153 */
2154 if (ret == -3) ret = 0;
2155 if (ret == -1) ret = 0;
2156 return(ret);
2157}
2158
2159/**
2160 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002161 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002162 * @out: an xmlBuffer for the output.
2163 * @in: an xmlBuffer for the input
2164 *
2165 * Generic front-end for the encoding handler input function
2166 *
2167 * Returns the number of byte written if success, or
2168 * -1 general error
2169 * -2 if the transcoding fails (for *in is not valid utf8 string or
2170 * the result of transformation can't fit into the encoding we want), or
2171 */
2172int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002173xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2174 xmlBufferPtr in)
2175{
Owen Taylor3473f882001-02-23 17:55:21 +00002176 int ret = -2;
2177 int written;
2178 int toconv;
2179
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002180 if (handler == NULL)
2181 return (-1);
2182 if (out == NULL)
2183 return (-1);
2184 if (in == NULL)
2185 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002186
2187 toconv = in->use;
2188 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002189 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002190 written = out->size - out->use;
2191 if (toconv * 2 >= written) {
2192 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002193 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002194 }
2195 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002196 ret = handler->input(&out->content[out->use], &written,
2197 in->content, &toconv);
2198 xmlBufferShrink(in, toconv);
2199 out->use += written;
2200 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002201 }
2202#ifdef LIBXML_ICONV_ENABLED
2203 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002204 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2205 &written, in->content, &toconv);
2206 xmlBufferShrink(in, toconv);
2207 out->use += written;
2208 out->content[out->use] = 0;
2209 if (ret == -1)
2210 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002211 }
2212#endif /* LIBXML_ICONV_ENABLED */
2213 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002214 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002215#ifdef DEBUG_ENCODING
2216 xmlGenericError(xmlGenericErrorContext,
2217 "converted %d bytes to %d bytes of input\n",
2218 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002219#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002220 break;
2221 case -1:
2222#ifdef DEBUG_ENCODING
2223 xmlGenericError(xmlGenericErrorContext,
2224 "converted %d bytes to %d bytes of input, %d left\n",
2225 toconv, written, in->use);
2226#endif
2227 break;
2228 case -3:
2229#ifdef DEBUG_ENCODING
2230 xmlGenericError(xmlGenericErrorContext,
2231 "converted %d bytes to %d bytes of input, %d left\n",
2232 toconv, written, in->use);
2233#endif
2234 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002235 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002236 xmlGenericError(xmlGenericErrorContext,
2237 "input conversion failed due to input error\n");
2238 xmlGenericError(xmlGenericErrorContext,
2239 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2240 in->content[0], in->content[1],
2241 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 }
2243 /*
2244 * Ignore when input buffer is not on a boundary
2245 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002246 if (ret == -3)
2247 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00002248 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00002249}
2250
2251/**
2252 * xmlCharEncOutFunc:
2253 * @handler: char enconding transformation data structure
2254 * @out: an xmlBuffer for the output.
2255 * @in: an xmlBuffer for the input
2256 *
2257 * Generic front-end for the encoding handler output function
2258 * a first call with @in == NULL has to be made firs to initiate the
2259 * output in case of non-stateless encoding needing to initiate their
2260 * state or the output (like the BOM in UTF16).
2261 * In case of UTF8 sequence conversion errors for the given encoder,
2262 * the content will be automatically remapped to a CharRef sequence.
2263 *
2264 * Returns the number of byte written if success, or
2265 * -1 general error
2266 * -2 if the transcoding fails (for *in is not valid utf8 string or
2267 * the result of transformation can't fit into the encoding we want), or
2268 */
2269int
2270xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2271 xmlBufferPtr in) {
2272 int ret = -2;
2273 int written;
2274 int writtentot = 0;
2275 int toconv;
2276 int output = 0;
2277
2278 if (handler == NULL) return(-1);
2279 if (out == NULL) return(-1);
2280
2281retry:
2282
2283 written = out->size - out->use;
2284
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002285 if (written > 0)
2286 written--; /* Gennady: count '/0' */
2287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 /*
2289 * First specific handling of in = NULL, i.e. the initialization call
2290 */
2291 if (in == NULL) {
2292 toconv = 0;
2293 if (handler->output != NULL) {
2294 ret = handler->output(&out->content[out->use], &written,
2295 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002296 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002297 out->use += written;
2298 out->content[out->use] = 0;
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 }
2301#ifdef LIBXML_ICONV_ENABLED
2302 else if (handler->iconv_out != NULL) {
2303 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2304 &written, NULL, &toconv);
2305 out->use += written;
2306 out->content[out->use] = 0;
2307 }
2308#endif /* LIBXML_ICONV_ENABLED */
2309#ifdef DEBUG_ENCODING
2310 xmlGenericError(xmlGenericErrorContext,
2311 "initialized encoder\n");
2312#endif
2313 return(0);
2314 }
2315
2316 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002317 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002318 */
2319 toconv = in->use;
2320 if (toconv == 0)
2321 return(0);
2322 if (toconv * 2 >= written) {
2323 xmlBufferGrow(out, toconv * 2);
2324 written = out->size - out->use - 1;
2325 }
2326 if (handler->output != NULL) {
2327 ret = handler->output(&out->content[out->use], &written,
2328 in->content, &toconv);
2329 xmlBufferShrink(in, toconv);
2330 out->use += written;
2331 writtentot += written;
2332 out->content[out->use] = 0;
2333 }
2334#ifdef LIBXML_ICONV_ENABLED
2335 else if (handler->iconv_out != NULL) {
2336 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2337 &written, in->content, &toconv);
2338 xmlBufferShrink(in, toconv);
2339 out->use += written;
2340 writtentot += written;
2341 out->content[out->use] = 0;
2342 if (ret == -1) {
2343 if (written > 0) {
2344 /*
2345 * Can be a limitation of iconv
2346 */
2347 goto retry;
2348 }
2349 ret = -3;
2350 }
2351 }
2352#endif /* LIBXML_ICONV_ENABLED */
2353 else {
2354 xmlGenericError(xmlGenericErrorContext,
2355 "xmlCharEncOutFunc: no output function !\n");
2356 return(-1);
2357 }
2358
2359 if (ret >= 0) output += ret;
2360
2361 /*
2362 * Attempt to handle error cases
2363 */
2364 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002365 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002366#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002367 xmlGenericError(xmlGenericErrorContext,
2368 "converted %d bytes to %d bytes of output\n",
2369 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002370#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002371 break;
2372 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002373#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002374 xmlGenericError(xmlGenericErrorContext,
2375 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002376#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002377 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002378 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002379#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002380 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2381 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002382#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002383 break;
2384 case -2: {
2385 int len = in->use;
2386 const xmlChar *utf = (const xmlChar *) in->content;
2387 int cur;
2388
2389 cur = xmlGetUTF8Char(utf, &len);
2390 if (cur > 0) {
2391 xmlChar charref[20];
2392
2393#ifdef DEBUG_ENCODING
2394 xmlGenericError(xmlGenericErrorContext,
2395 "handling output conversion error\n");
2396 xmlGenericError(xmlGenericErrorContext,
2397 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2398 in->content[0], in->content[1],
2399 in->content[2], in->content[3]);
2400#endif
2401 /*
2402 * Removes the UTF8 sequence, and replace it by a charref
2403 * and continue the transcoding phase, hoping the error
2404 * did not mangle the encoder state.
2405 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002406 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002407 xmlBufferShrink(in, len);
2408 xmlBufferAddHead(in, charref, -1);
2409
2410 goto retry;
2411 } else {
2412 xmlGenericError(xmlGenericErrorContext,
2413 "output conversion failed due to conv error\n");
2414 xmlGenericError(xmlGenericErrorContext,
2415 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2416 in->content[0], in->content[1],
2417 in->content[2], in->content[3]);
2418 in->content[0] = ' ';
2419 }
2420 break;
2421 }
2422 }
2423 return(ret);
2424}
2425
2426/**
2427 * xmlCharEncCloseFunc:
2428 * @handler: char enconding transformation data structure
2429 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002430 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002431 *
2432 * Returns 0 if success, or -1 in case of error
2433 */
2434int
2435xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2436 int ret = 0;
2437 if (handler == NULL) return(-1);
2438 if (handler->name == NULL) return(-1);
2439#ifdef LIBXML_ICONV_ENABLED
2440 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002441 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002442 * and the associated icon resources.
2443 */
2444 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2445 if (handler->name != NULL)
2446 xmlFree(handler->name);
2447 handler->name = NULL;
2448 if (handler->iconv_out != NULL) {
2449 if (iconv_close(handler->iconv_out))
2450 ret = -1;
2451 handler->iconv_out = NULL;
2452 }
2453 if (handler->iconv_in != NULL) {
2454 if (iconv_close(handler->iconv_in))
2455 ret = -1;
2456 handler->iconv_in = NULL;
2457 }
2458 xmlFree(handler);
2459 }
2460#endif /* LIBXML_ICONV_ENABLED */
2461#ifdef DEBUG_ENCODING
2462 if (ret)
2463 xmlGenericError(xmlGenericErrorContext,
2464 "failed to close the encoding handler\n");
2465 else
2466 xmlGenericError(xmlGenericErrorContext,
2467 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002468#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002469
Owen Taylor3473f882001-02-23 17:55:21 +00002470 return(ret);
2471}
2472
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002473#ifndef LIBXML_ICONV_ENABLED
2474#ifdef LIBXML_ISO8859X_ENABLED
2475
2476/**
2477 * UTF8ToISO8859x:
2478 * @out: a pointer to an array of bytes to store the result
2479 * @outlen: the length of @out
2480 * @in: a pointer to an array of UTF-8 chars
2481 * @inlen: the length of @in
2482 * @xlattable: the 2-level transcoding table
2483 *
2484 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2485 * block of chars out.
2486 *
2487 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2488 * The value of @inlen after return is the number of octets consumed
2489 * as the return value is positive, else unpredictable.
2490 * The value of @outlen after return is the number of ocetes consumed.
2491 */
2492static int
2493UTF8ToISO8859x(unsigned char* out, int *outlen,
2494 const unsigned char* in, int *inlen,
2495 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002496 const unsigned char* outstart = out;
2497 const unsigned char* inend;
2498 const unsigned char* instart = in;
2499
2500 if (in == NULL) {
2501 /*
2502 * initialization nothing to do
2503 */
2504 *outlen = 0;
2505 *inlen = 0;
2506 return(0);
2507 }
2508 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002509 while (in < inend) {
2510 unsigned char d = *in++;
2511 if (d < 0x80) {
2512 *out++ = d;
2513 } else if (d < 0xC0) {
2514 /* trailing byte in leading position */
2515 *outlen = out - outstart;
2516 *inlen = in - instart - 1;
2517 return(-2);
2518 } else if (d < 0xE0) {
2519 unsigned char c;
2520 if (!(in < inend)) {
2521 /* trailing byte not in input buffer */
2522 *outlen = out - outstart;
2523 *inlen = in - instart - 1;
2524 return(-2);
2525 }
2526 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002527 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002528 /* not a trailing byte */
2529 *outlen = out - outstart;
2530 *inlen = in - instart - 2;
2531 return(-2);
2532 }
2533 c = c & 0x3F;
2534 d = d & 0x1F;
2535 d = xlattable [48 + c + xlattable [d] * 64];
2536 if (d == 0) {
2537 /* not in character set */
2538 *outlen = out - outstart;
2539 *inlen = in - instart - 2;
2540 return(-2);
2541 }
2542 *out++ = d;
2543 } else if (d < 0xF0) {
2544 unsigned char c1;
2545 unsigned char c2;
2546 if (!(in < inend - 1)) {
2547 /* trailing bytes not in input buffer */
2548 *outlen = out - outstart;
2549 *inlen = in - instart - 1;
2550 return(-2);
2551 }
2552 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002553 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002554 /* not a trailing byte (c1) */
2555 *outlen = out - outstart;
2556 *inlen = in - instart - 2;
2557 return(-2);
2558 }
2559 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002560 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002561 /* not a trailing byte (c2) */
2562 *outlen = out - outstart;
2563 *inlen = in - instart - 2;
2564 return(-2);
2565 }
2566 c1 = c1 & 0x3F;
2567 c2 = c2 & 0x3F;
2568 d = d & 0x0F;
2569 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2570 if (d == 0) {
2571 /* not in character set */
2572 *outlen = out - outstart;
2573 *inlen = in - instart - 3;
2574 return(-2);
2575 }
2576 *out++ = d;
2577 } else {
2578 /* cannot transcode >= U+010000 */
2579 *outlen = out - outstart;
2580 *inlen = in - instart - 1;
2581 return(-2);
2582 }
2583 }
2584 *outlen = out - outstart;
2585 *inlen = in - instart;
2586 return(0);
2587}
2588
2589/**
2590 * ISO8859xToUTF8
2591 * @out: a pointer to an array of bytes to store the result
2592 * @outlen: the length of @out
2593 * @in: a pointer to an array of ISO Latin 1 chars
2594 * @inlen: the length of @in
2595 *
2596 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2597 * block of chars out.
2598 * Returns 0 if success, or -1 otherwise
2599 * The value of @inlen after return is the number of octets consumed
2600 * The value of @outlen after return is the number of ocetes produced.
2601 */
2602static int
2603ISO8859xToUTF8(unsigned char* out, int *outlen,
2604 const unsigned char* in, int *inlen,
2605 unsigned short const *unicodetable) {
2606 unsigned char* outstart = out;
2607 unsigned char* outend = out + *outlen;
2608 const unsigned char* instart = in;
2609 const unsigned char* inend = in + *inlen;
2610 const unsigned char* instop = inend;
2611 unsigned int c = *in;
2612
2613 while (in < inend && out < outend - 1) {
2614 if (c >= 0x80) {
2615 c = unicodetable [c - 0x80];
2616 if (c == 0) {
2617 /* undefined code point */
2618 *outlen = out - outstart;
2619 *inlen = in - instart;
2620 return (-1);
2621 }
2622 if (c < 0x800) {
2623 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2624 *out++ = (c & 0x3F) | 0x80;
2625 } else {
2626 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2627 *out++ = ((c >> 6) & 0x3F) | 0x80;
2628 *out++ = (c & 0x3F) | 0x80;
2629 }
2630 ++in;
2631 c = *in;
2632 }
2633 if (instop - in > outend - out) instop = in + (outend - out);
2634 while (c < 0x80 && in < instop) {
2635 *out++ = c;
2636 ++in;
2637 c = *in;
2638 }
2639 }
2640 if (in < inend && out < outend && c < 0x80) {
2641 *out++ = c;
2642 ++in;
2643 }
2644 *outlen = out - outstart;
2645 *inlen = in - instart;
2646 return (0);
2647}
2648
2649
2650/************************************************************************
2651 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2652 ************************************************************************/
2653
2654static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2655 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2656 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2657 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2658 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2659 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2660 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2661 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2662 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2663 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2664 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2665 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2666 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2667 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2668 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2669 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2670 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2671};
2672
2673static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2674 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2682 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2683 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2684 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2685 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2686 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2689 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2690 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2693 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2694 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2695 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2696 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2697 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2698 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2699 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2700 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2701};
2702
2703static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2704 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2705 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2706 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2707 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2708 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2709 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2710 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2711 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2712 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2713 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2714 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2715 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2716 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2717 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2718 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2719 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2720};
2721
2722static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2723 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2731 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2732 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2733 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2734 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2736 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2742 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2750 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2751 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2752 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2753 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2754};
2755
2756static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2757 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2758 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2759 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2760 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2761 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2762 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2763 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2764 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2765 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2766 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2767 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2768 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2769 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2770 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2771 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2772 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2773};
2774
2775static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2776 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2784 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2785 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2786 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2787 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2788 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2790 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2791 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2792 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2793 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2794 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2795 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2800 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2801 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2802 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2803};
2804
2805static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2806 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2807 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2808 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2809 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2810 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2811 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2812 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2813 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2814 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2815 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2816 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2817 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2818 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2819 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2820 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2821 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2822};
2823
2824static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2825 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2833 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2834 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2837 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2838 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2839 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2840 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2841 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2844 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852};
2853
2854static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2855 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2856 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2857 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2858 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2859 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2860 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2861 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2862 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2863 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2864 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2865 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2866 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2867 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2868 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2869 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2870 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2871};
2872
2873static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2874 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2882 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2883 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2885 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2891 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2892 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2893 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2894 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897};
2898
2899static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2900 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2901 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2902 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2903 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2904 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2905 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2906 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2907 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2908 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2909 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2910 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2911 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2912 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2913 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2914 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2915 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2916};
2917
2918static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2919 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2927 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2928 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2929 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2930 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2943 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2944 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2945 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2946 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950};
2951
2952static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2953 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2954 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2955 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2956 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2957 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2958 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2959 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2960 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2961 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2962 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2963 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2964 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2965 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2966 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2967 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2968 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2969};
2970
2971static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2972 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2980 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2981 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2982 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2983 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2996 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3001 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3003};
3004
3005static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3006 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3007 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3008 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3009 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3010 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3011 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3012 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3013 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3014 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3015 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3016 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3017 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3018 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3019 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3020 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3021 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3022};
3023
3024static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3025 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3033 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3034 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3035 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3036 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3037 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3038 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3039 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3040 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048};
3049
3050static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3051 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3052 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3053 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3054 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3055 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3056 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3057 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3058 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3059 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3060 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3061 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3062 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3063 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3064 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3065 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3066 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3067};
3068
3069static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3070 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3078 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3079 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3080 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3081 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3082 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3083 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3084 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3085 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3088 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3089 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3098 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3099 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3100 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3101};
3102
3103static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3104 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3105 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3106 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3107 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3108 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3109 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3110 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3111 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3112 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3113 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3114 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3115 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3116 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3117 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3118 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3119 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3120};
3121
3122static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3123 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3131 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3132 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3138 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3139 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3140 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3141 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3142 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3147 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3148 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150};
3151
3152static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3153 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3154 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3155 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3156 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3157 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3158 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3159 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3160 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3161 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3162 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3163 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3164 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3165 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3166 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3167 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3168 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3169};
3170
3171static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3172 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3180 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3181 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3182 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3183 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3193 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3195 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3197 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3198 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3199 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3200 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3201 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3202 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3203};
3204
3205static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3206 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3207 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3208 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3209 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3210 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3211 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3212 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3213 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3214 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3215 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3216 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3217 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3218 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3219 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3220 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3221 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3222};
3223
3224static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3225 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3233 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3234 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3235 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3240 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3260 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3265 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3266 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3267 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3268};
3269
3270static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3271 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3272 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3273 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3274 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3275 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3276 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3277 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3278 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3279 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3280 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3281 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3282 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3283 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3284 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3285 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3286 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3287};
3288
3289static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3290 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3298 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3299 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3300 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3301 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3313 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3314 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3315 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3316 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3317};
3318
3319static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3320 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3321 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3322 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3323 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3324 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3325 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3326 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3327 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3328 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3329 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3330 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3331 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3332 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3333 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3334 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3335 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3336};
3337
3338static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3339 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3347 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3348 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3349 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3350 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3351 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3352 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3356 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3358 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3375 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3376 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3377 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3378};
3379
3380
3381/*
3382 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3383 */
3384
3385static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3386 const unsigned char* in, int *inlen) {
3387 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3388}
3389static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3390 const unsigned char* in, int *inlen) {
3391 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3392}
3393
3394static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3395 const unsigned char* in, int *inlen) {
3396 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3397}
3398static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3399 const unsigned char* in, int *inlen) {
3400 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3401}
3402
3403static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3404 const unsigned char* in, int *inlen) {
3405 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3406}
3407static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3408 const unsigned char* in, int *inlen) {
3409 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3410}
3411
3412static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3413 const unsigned char* in, int *inlen) {
3414 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3415}
3416static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3417 const unsigned char* in, int *inlen) {
3418 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3419}
3420
3421static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3422 const unsigned char* in, int *inlen) {
3423 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3424}
3425static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3426 const unsigned char* in, int *inlen) {
3427 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3428}
3429
3430static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3431 const unsigned char* in, int *inlen) {
3432 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3433}
3434static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3435 const unsigned char* in, int *inlen) {
3436 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3437}
3438
3439static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3440 const unsigned char* in, int *inlen) {
3441 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3442}
3443static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3444 const unsigned char* in, int *inlen) {
3445 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3446}
3447
3448static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3449 const unsigned char* in, int *inlen) {
3450 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3451}
3452static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3453 const unsigned char* in, int *inlen) {
3454 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3455}
3456
3457static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3458 const unsigned char* in, int *inlen) {
3459 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3460}
3461static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3462 const unsigned char* in, int *inlen) {
3463 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3464}
3465
3466static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3467 const unsigned char* in, int *inlen) {
3468 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3469}
3470static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3471 const unsigned char* in, int *inlen) {
3472 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3473}
3474
3475static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3476 const unsigned char* in, int *inlen) {
3477 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3478}
3479static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3480 const unsigned char* in, int *inlen) {
3481 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3482}
3483
3484static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3485 const unsigned char* in, int *inlen) {
3486 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3487}
3488static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3489 const unsigned char* in, int *inlen) {
3490 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3491}
3492
3493static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3494 const unsigned char* in, int *inlen) {
3495 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3496}
3497static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3498 const unsigned char* in, int *inlen) {
3499 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3500}
3501
3502static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3503 const unsigned char* in, int *inlen) {
3504 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3505}
3506static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3507 const unsigned char* in, int *inlen) {
3508 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3509}
3510
3511static void
3512xmlRegisterCharEncodingHandlersISO8859x (void) {
3513 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3514 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3515 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3516 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3517 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3518 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3519 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3520 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3521 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3522 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3523 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3524 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3525 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3526 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3527}
3528
3529#endif
3530#endif
3531
3532