blob: 22c47c0344ed3f28e5a3d37290b5809e11ae60b2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
20 * UTF8 string routines from:
21 * "William M. Brack" <wbrack@mmm.com.hk>
22 *
23 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000024 */
25
Daniel Veillard34ce8be2002-03-18 19:37:11 +000026#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000027#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000028
Owen Taylor3473f882001-02-23 17:55:21 +000029#include <string.h>
30
31#ifdef HAVE_CTYPE_H
32#include <ctype.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
Owen Taylor3473f882001-02-23 17:55:21 +000037#ifdef LIBXML_ICONV_ENABLED
38#ifdef HAVE_ERRNO_H
39#include <errno.h>
40#endif
41#endif
42#include <libxml/encoding.h>
43#include <libxml/xmlmemory.h>
44#ifdef LIBXML_HTML_ENABLED
45#include <libxml/HTMLparser.h>
46#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000047#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000048#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049
Daniel Veillard22090732001-07-16 00:06:07 +000050static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
51static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000052
53typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
54typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
55struct _xmlCharEncodingAlias {
56 const char *name;
57 const char *alias;
58};
59
60static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
61static int xmlCharEncodingAliasesNb = 0;
62static int xmlCharEncodingAliasesMax = 0;
63
64#ifdef LIBXML_ICONV_ENABLED
65#if 0
66#define DEBUG_ENCODING /* Define this to get encoding traces */
67#endif
68#endif
69
70static int xmlLittleEndian = 1;
71
Daniel Veillard97ac1312001-05-30 19:14:17 +000072/************************************************************************
73 * *
74 * Generic UTF8 handling routines *
75 * *
76 * From rfc2044: encoding of the Unicode values on UTF-8: *
77 * *
78 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
79 * 0000 0000-0000 007F 0xxxxxxx *
80 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
81 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
82 * *
83 * I hope we won't use values > 0xFFFF anytime soon ! *
84 * *
85 ************************************************************************/
Owen Taylor3473f882001-02-23 17:55:21 +000086
87/**
William M. Brack4a557d92003-07-29 04:28:04 +000088 * xmlUTF8Size:
89 * @utf: pointer to the UTF8 character
90 *
91 * returns the numbers of bytes in the character, -1 on format error
92 */
93int
94xmlUTF8Size(const xmlChar *utf) {
95 xmlChar mask;
96 int len;
97
98 if (utf == NULL)
99 return -1;
100 if (*utf < 0x80)
101 return 1;
102 /* check valid UTF8 character */
103 if (!(*utf & 0x40))
104 return -1;
105 /* determine number of bytes in char */
106 len = 2;
107 for (mask=0x20; mask != 0; mask>>=1) {
108 if (!(*utf & mask))
109 return len;
110 len++;
111 }
112 return -1;
113}
114
115/**
116 * xmlUTF8Charcmp
117 * @utf1: pointer to first UTF8 char
118 * @utf2: pointer to second UTF8 char
119 *
120 * returns result of comparing the two UCS4 values
121 * as with xmlStrncmp
122 */
123int
124xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
125
126 if (utf1 == NULL ) {
127 if (utf2 == NULL)
128 return 0;
129 return -1;
130 }
Daniel Veillard9ff7de12003-07-29 13:30:42 +0000131 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
William M. Brack4a557d92003-07-29 04:28:04 +0000132}
133
134/**
Daniel Veillarde043ee12001-04-16 14:08:07 +0000135 * xmlUTF8Strlen:
136 * @utf: a sequence of UTF-8 encoded bytes
137 *
Daniel Veillard60087f32001-10-10 09:45:09 +0000138 * compute the length of an UTF8 string, it doesn't do a full UTF8
Daniel Veillarde043ee12001-04-16 14:08:07 +0000139 * checking of the content of the string.
140 *
141 * Returns the number of characters in the string or -1 in case of error
142 */
143int
Daniel Veillard97ac1312001-05-30 19:14:17 +0000144xmlUTF8Strlen(const xmlChar *utf) {
Daniel Veillarde043ee12001-04-16 14:08:07 +0000145 int ret = 0;
146
147 if (utf == NULL)
148 return(-1);
149
150 while (*utf != 0) {
151 if (utf[0] & 0x80) {
152 if ((utf[1] & 0xc0) != 0x80)
153 return(-1);
154 if ((utf[0] & 0xe0) == 0xe0) {
155 if ((utf[2] & 0xc0) != 0x80)
156 return(-1);
157 if ((utf[0] & 0xf0) == 0xf0) {
158 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
159 return(-1);
160 utf += 4;
161 } else {
162 utf += 3;
163 }
164 } else {
165 utf += 2;
166 }
167 } else {
168 utf++;
169 }
170 ret++;
171 }
172 return(ret);
173}
174
175/**
Owen Taylor3473f882001-02-23 17:55:21 +0000176 * xmlGetUTF8Char:
177 * @utf: a sequence of UTF-8 encoded bytes
178 * @len: a pointer to @bytes len
179 *
180 * Read one UTF8 Char from @utf
181 *
182 * Returns the char value or -1 in case of error and update @len with the
183 * number of bytes used
184 */
Daniel Veillardf000f072002-10-22 14:28:17 +0000185int
Owen Taylor3473f882001-02-23 17:55:21 +0000186xmlGetUTF8Char(const unsigned char *utf, int *len) {
187 unsigned int c;
188
189 if (utf == NULL)
190 goto error;
191 if (len == NULL)
192 goto error;
193 if (*len < 1)
194 goto error;
195
196 c = utf[0];
197 if (c & 0x80) {
198 if (*len < 2)
199 goto error;
200 if ((utf[1] & 0xc0) != 0x80)
201 goto error;
202 if ((c & 0xe0) == 0xe0) {
203 if (*len < 3)
204 goto error;
205 if ((utf[2] & 0xc0) != 0x80)
206 goto error;
207 if ((c & 0xf0) == 0xf0) {
208 if (*len < 4)
209 goto error;
210 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
211 goto error;
212 *len = 4;
213 /* 4-byte code */
214 c = (utf[0] & 0x7) << 18;
215 c |= (utf[1] & 0x3f) << 12;
216 c |= (utf[2] & 0x3f) << 6;
217 c |= utf[3] & 0x3f;
218 } else {
219 /* 3-byte code */
220 *len = 3;
221 c = (utf[0] & 0xf) << 12;
222 c |= (utf[1] & 0x3f) << 6;
223 c |= utf[2] & 0x3f;
224 }
225 } else {
226 /* 2-byte code */
227 *len = 2;
228 c = (utf[0] & 0x1f) << 6;
229 c |= utf[1] & 0x3f;
230 }
231 } else {
232 /* 1-byte code */
233 *len = 1;
234 }
235 return(c);
236
237error:
238 *len = 0;
239 return(-1);
240}
241
242/**
Daniel Veillard01c13b52002-12-10 15:19:08 +0000243 * xmlCheckUTF8:
Owen Taylor3473f882001-02-23 17:55:21 +0000244 * @utf: Pointer to putative utf-8 encoded string.
245 *
246 * Checks @utf for being valid utf-8. @utf is assumed to be
247 * null-terminated. This function is not super-strict, as it will
248 * allow longer utf-8 sequences than necessary. Note that Java is
249 * capable of producing these sequences if provoked. Also note, this
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000250 * routine checks for the 4-byte maximum size, but does not check for
Owen Taylor3473f882001-02-23 17:55:21 +0000251 * 0x10ffff maximum value.
252 *
253 * Return value: true if @utf is valid.
254 **/
255int
256xmlCheckUTF8(const unsigned char *utf)
257{
258 int ix;
259 unsigned char c;
260
261 for (ix = 0; (c = utf[ix]);) {
262 if (c & 0x80) {
263 if ((utf[ix + 1] & 0xc0) != 0x80)
264 return(0);
265 if ((c & 0xe0) == 0xe0) {
266 if ((utf[ix + 2] & 0xc0) != 0x80)
267 return(0);
268 if ((c & 0xf0) == 0xf0) {
269 if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
270 return(0);
271 ix += 4;
272 /* 4-byte code */
273 } else
274 /* 3-byte code */
275 ix += 3;
276 } else
277 /* 2-byte code */
278 ix += 2;
279 } else
280 /* 1-byte code */
281 ix++;
282 }
283 return(1);
284}
285
286/**
Daniel Veillard97ac1312001-05-30 19:14:17 +0000287 * xmlUTF8Strsize:
288 * @utf: a sequence of UTF-8 encoded bytes
289 * @len: the number of characters in the array
290 *
291 * storage size of an UTF8 string
292 *
293 * Returns the storage size of
294 * the first 'len' characters of ARRAY
295 *
296 */
297
298int
299xmlUTF8Strsize(const xmlChar *utf, int len) {
300 const xmlChar *ptr=utf;
301 xmlChar ch;
302
303 if (len <= 0)
304 return(0);
305
306 while ( len-- > 0) {
307 if ( !*ptr )
308 break;
309 if ( (ch = *ptr++) & 0x80)
310 while ( (ch<<=1) & 0x80 )
311 ptr++;
312 }
313 return (ptr - utf);
314}
315
316
317/**
318 * xmlUTF8Strndup:
319 * @utf: the input UTF8 *
320 * @len: the len of @utf (in chars)
321 *
322 * a strndup for array of UTF8's
323 *
324 * Returns a new UTF8 * or NULL
325 */
326xmlChar *
327xmlUTF8Strndup(const xmlChar *utf, int len) {
328 xmlChar *ret;
329 int i;
330
331 if ((utf == NULL) || (len < 0)) return(NULL);
332 i = xmlUTF8Strsize(utf, len);
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000333 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
Daniel Veillard97ac1312001-05-30 19:14:17 +0000334 if (ret == NULL) {
335 xmlGenericError(xmlGenericErrorContext,
336 "malloc of %ld byte failed\n",
337 (len + 1) * (long)sizeof(xmlChar));
338 return(NULL);
339 }
340 memcpy(ret, utf, i * sizeof(xmlChar));
341 ret[i] = 0;
342 return(ret);
343}
344
345/**
346 * xmlUTF8Strpos:
347 * @utf: the input UTF8 *
348 * @pos: the position of the desired UTF8 char (in chars)
349 *
350 * a function to provide the equivalent of fetching a
351 * character from a string array
352 *
353 * Returns a pointer to the UTF8 character or NULL
354 */
355xmlChar *
356xmlUTF8Strpos(const xmlChar *utf, int pos) {
357 xmlChar ch;
358
359 if (utf == NULL) return(NULL);
360 if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
361 return(NULL);
362 while (pos--) {
363 if ((ch=*utf++) == 0) return(NULL);
364 if ( ch & 0x80 ) {
365 /* if not simple ascii, verify proper format */
366 if ( (ch & 0xc0) != 0xc0 )
367 return(NULL);
368 /* then skip over remaining bytes for this char */
369 while ( (ch <<= 1) & 0x80 )
370 if ( (*utf++ & 0xc0) != 0x80 )
371 return(NULL);
372 }
373 }
374 return((xmlChar *)utf);
375}
376
377/**
378 * xmlUTF8Strloc:
379 * @utf: the input UTF8 *
380 * @utfchar: the UTF8 character to be found
381 *
382 * a function to provide relative location of a UTF8 char
383 *
384 * Returns the relative character position of the desired char
385 * or -1 if not found
386 */
387int
388xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
389 int i, size;
390 xmlChar ch;
391
392 if (utf==NULL || utfchar==NULL) return -1;
393 size = xmlUTF8Strsize(utfchar, 1);
394 for(i=0; (ch=*utf) != 0; i++) {
395 if (xmlStrncmp(utf, utfchar, size)==0)
396 return(i);
397 utf++;
398 if ( ch & 0x80 ) {
399 /* if not simple ascii, verify proper format */
400 if ( (ch & 0xc0) != 0xc0 )
401 return(-1);
402 /* then skip over remaining bytes for this char */
403 while ( (ch <<= 1) & 0x80 )
404 if ( (*utf++ & 0xc0) != 0x80 )
405 return(-1);
406 }
407 }
408
409 return(-1);
410}
411/**
412 * xmlUTF8Strsub:
413 * @utf: a sequence of UTF-8 encoded bytes
Daniel Veillard97ac1312001-05-30 19:14:17 +0000414 * @start: relative pos of first char
415 * @len: total number to copy
416 *
417 * Note: positions are given in units of UTF-8 chars
418 *
419 * Returns a pointer to a newly created string
420 * or NULL if any problem
421 */
422
423xmlChar *
424xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
425 int i;
426 xmlChar ch;
427
428 if (utf == NULL) return(NULL);
429 if (start < 0) return(NULL);
430 if (len < 0) return(NULL);
431
432 /*
433 * Skip over any leading chars
434 */
435 for (i = 0;i < start;i++) {
436 if ((ch=*utf++) == 0) return(NULL);
437 if ( ch & 0x80 ) {
438 /* if not simple ascii, verify proper format */
439 if ( (ch & 0xc0) != 0xc0 )
440 return(NULL);
441 /* then skip over remaining bytes for this char */
442 while ( (ch <<= 1) & 0x80 )
443 if ( (*utf++ & 0xc0) != 0x80 )
444 return(NULL);
445 }
446 }
447
448 return(xmlUTF8Strndup(utf, len));
449}
450
451/************************************************************************
452 * *
453 * Conversions To/From UTF8 encoding *
454 * *
455 ************************************************************************/
456
457/**
Owen Taylor3473f882001-02-23 17:55:21 +0000458 * asciiToUTF8:
459 * @out: a pointer to an array of bytes to store the result
460 * @outlen: the length of @out
461 * @in: a pointer to an array of ASCII chars
462 * @inlen: the length of @in
463 *
464 * Take a block of ASCII chars in and try to convert it to an UTF-8
465 * block of chars out.
466 * Returns 0 if success, or -1 otherwise
467 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000468 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000469 * The value of @outlen after return is the number of ocetes consumed.
470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000471static int
Owen Taylor3473f882001-02-23 17:55:21 +0000472asciiToUTF8(unsigned char* out, int *outlen,
473 const unsigned char* in, int *inlen) {
474 unsigned char* outstart = out;
475 const unsigned char* base = in;
476 const unsigned char* processed = in;
477 unsigned char* outend = out + *outlen;
478 const unsigned char* inend;
479 unsigned int c;
480 int bits;
481
482 inend = in + (*inlen);
483 while ((in < inend) && (out - outstart + 5 < *outlen)) {
484 c= *in++;
485
486 /* assertion: c is a single UTF-4 value */
487 if (out >= outend)
488 break;
489 if (c < 0x80) { *out++= c; bits= -6; }
490 else {
491 *outlen = out - outstart;
492 *inlen = processed - base;
493 return(-1);
494 }
495
496 for ( ; bits >= 0; bits-= 6) {
497 if (out >= outend)
498 break;
499 *out++= ((c >> bits) & 0x3F) | 0x80;
500 }
501 processed = (const unsigned char*) in;
502 }
503 *outlen = out - outstart;
504 *inlen = processed - base;
505 return(0);
506}
507
508/**
509 * UTF8Toascii:
510 * @out: a pointer to an array of bytes to store the result
511 * @outlen: the length of @out
512 * @in: a pointer to an array of UTF-8 chars
513 * @inlen: the length of @in
514 *
515 * Take a block of UTF-8 chars in and try to convert it to an ASCII
516 * block of chars out.
517 *
518 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
519 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000520 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000521 * The value of @outlen after return is the number of ocetes consumed.
522 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000523static int
Owen Taylor3473f882001-02-23 17:55:21 +0000524UTF8Toascii(unsigned char* out, int *outlen,
525 const unsigned char* in, int *inlen) {
526 const unsigned char* processed = in;
527 const unsigned char* outend;
528 const unsigned char* outstart = out;
529 const unsigned char* instart = in;
530 const unsigned char* inend;
531 unsigned int c, d;
532 int trailing;
533
534 if (in == NULL) {
535 /*
536 * initialization nothing to do
537 */
538 *outlen = 0;
539 *inlen = 0;
540 return(0);
541 }
542 inend = in + (*inlen);
543 outend = out + (*outlen);
544 while (in < inend) {
545 d = *in++;
546 if (d < 0x80) { c= d; trailing= 0; }
547 else if (d < 0xC0) {
548 /* trailing byte in leading position */
549 *outlen = out - outstart;
550 *inlen = processed - instart;
551 return(-2);
552 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
553 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
554 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
555 else {
556 /* no chance for this in Ascii */
557 *outlen = out - outstart;
558 *inlen = processed - instart;
559 return(-2);
560 }
561
562 if (inend - in < trailing) {
563 break;
564 }
565
566 for ( ; trailing; trailing--) {
567 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
568 break;
569 c <<= 6;
570 c |= d & 0x3F;
571 }
572
573 /* assertion: c is a single UTF-4 value */
574 if (c < 0x80) {
575 if (out >= outend)
576 break;
577 *out++ = c;
578 } else {
579 /* no chance for this in Ascii */
580 *outlen = out - outstart;
581 *inlen = processed - instart;
582 return(-2);
583 }
584 processed = in;
585 }
586 *outlen = out - outstart;
587 *inlen = processed - instart;
588 return(0);
589}
590
591/**
592 * isolat1ToUTF8:
593 * @out: a pointer to an array of bytes to store the result
594 * @outlen: the length of @out
595 * @in: a pointer to an array of ISO Latin 1 chars
596 * @inlen: the length of @in
597 *
598 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
599 * block of chars out.
600 * Returns 0 if success, or -1 otherwise
601 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000602 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000603 * The value of @outlen after return is the number of ocetes consumed.
604 */
605int
606isolat1ToUTF8(unsigned char* out, int *outlen,
607 const unsigned char* in, int *inlen) {
608 unsigned char* outstart = out;
609 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000610 unsigned char* outend = out + *outlen;
611 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000612 const unsigned char* instop;
613 xmlChar c = *in;
Owen Taylor3473f882001-02-23 17:55:21 +0000614
615 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000616 instop = inend;
617
618 while (in < inend && out < outend - 1) {
619 if (c >= 0x80) {
Daniel Veillarddb552912002-03-21 13:27:59 +0000620 *out++= ((c >> 6) & 0x1F) | 0xC0;
Daniel Veillard02141ea2001-04-30 11:46:40 +0000621 *out++= (c & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000622 ++in;
623 c = *in;
624 }
625 if (instop - in > outend - out) instop = in + (outend - out);
626 while (c < 0x80 && in < instop) {
627 *out++ = c;
628 ++in;
629 c = *in;
630 }
631 }
632 if (in < inend && out < outend && c < 0x80) {
633 *out++ = c;
634 ++in;
Owen Taylor3473f882001-02-23 17:55:21 +0000635 }
636 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000637 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000638 return(0);
639}
640
Daniel Veillard81601f92003-01-14 13:42:37 +0000641/**
642 * UTF8ToUTF8:
643 * @out: a pointer to an array of bytes to store the result
644 * @outlen: the length of @out
645 * @inb: a pointer to an array of UTF-8 chars
646 * @inlenb: the length of @in in UTF-8 chars
647 *
648 * No op copy operation for UTF8 handling.
649 *
650 * Returns the number of byte written, or -1 by lack of space, or -2
651 * if the transcoding fails (for *in is not valid utf16 string)
652 * The value of *inlen after return is the number of octets consumed
653 * as the return value is positive, else unpredictable.
654 */
655static int
656UTF8ToUTF8(unsigned char* out, int *outlen,
657 const unsigned char* inb, int *inlenb)
658{
659 int len;
660
661 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
662 return(-1);
663 if (*outlen > *inlenb) {
664 len = *inlenb;
665 } else {
666 len = *outlen;
667 }
668 if (len < 0)
669 return(-1);
670
671 memcpy(out, inb, len);
672
673 *outlen = len;
674 *inlenb = len;
675 return(0);
676}
677
Daniel Veillarde72c7562002-05-31 09:47:30 +0000678
Owen Taylor3473f882001-02-23 17:55:21 +0000679/**
680 * UTF8Toisolat1:
681 * @out: a pointer to an array of bytes to store the result
682 * @outlen: the length of @out
683 * @in: a pointer to an array of UTF-8 chars
684 * @inlen: the length of @in
685 *
686 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
687 * block of chars out.
688 *
689 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
690 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000691 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000692 * The value of @outlen after return is the number of ocetes consumed.
693 */
694int
695UTF8Toisolat1(unsigned char* out, int *outlen,
696 const unsigned char* in, int *inlen) {
697 const unsigned char* processed = in;
698 const unsigned char* outend;
699 const unsigned char* outstart = out;
700 const unsigned char* instart = in;
701 const unsigned char* inend;
702 unsigned int c, d;
703 int trailing;
704
705 if (in == NULL) {
706 /*
707 * initialization nothing to do
708 */
709 *outlen = 0;
710 *inlen = 0;
711 return(0);
712 }
713 inend = in + (*inlen);
714 outend = out + (*outlen);
715 while (in < inend) {
716 d = *in++;
717 if (d < 0x80) { c= d; trailing= 0; }
718 else if (d < 0xC0) {
719 /* trailing byte in leading position */
720 *outlen = out - outstart;
721 *inlen = processed - instart;
722 return(-2);
723 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
724 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
725 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
726 else {
727 /* no chance for this in IsoLat1 */
728 *outlen = out - outstart;
729 *inlen = processed - instart;
730 return(-2);
731 }
732
733 if (inend - in < trailing) {
734 break;
735 }
736
737 for ( ; trailing; trailing--) {
738 if (in >= inend)
739 break;
740 if (((d= *in++) & 0xC0) != 0x80) {
741 *outlen = out - outstart;
742 *inlen = processed - instart;
743 return(-2);
744 }
745 c <<= 6;
746 c |= d & 0x3F;
747 }
748
749 /* assertion: c is a single UTF-4 value */
750 if (c <= 0xFF) {
751 if (out >= outend)
752 break;
753 *out++ = c;
754 } else {
755 /* no chance for this in IsoLat1 */
756 *outlen = out - outstart;
757 *inlen = processed - instart;
758 return(-2);
759 }
760 processed = in;
761 }
762 *outlen = out - outstart;
763 *inlen = processed - instart;
764 return(0);
765}
766
767/**
768 * UTF16LEToUTF8:
769 * @out: a pointer to an array of bytes to store the result
770 * @outlen: the length of @out
771 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
772 * @inlenb: the length of @in in UTF-16LE chars
773 *
774 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000775 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000776 * is the same between the native type of this machine and the
777 * inputed one.
778 *
779 * Returns the number of byte written, or -1 by lack of space, or -2
780 * if the transcoding fails (for *in is not valid utf16 string)
781 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000782 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000783 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000784static int
Owen Taylor3473f882001-02-23 17:55:21 +0000785UTF16LEToUTF8(unsigned char* out, int *outlen,
786 const unsigned char* inb, int *inlenb)
787{
788 unsigned char* outstart = out;
789 const unsigned char* processed = inb;
790 unsigned char* outend = out + *outlen;
791 unsigned short* in = (unsigned short*) inb;
792 unsigned short* inend;
793 unsigned int c, d, inlen;
794 unsigned char *tmp;
795 int bits;
796
797 if ((*inlenb % 2) == 1)
798 (*inlenb)--;
799 inlen = *inlenb / 2;
800 inend = in + inlen;
801 while ((in < inend) && (out - outstart + 5 < *outlen)) {
802 if (xmlLittleEndian) {
803 c= *in++;
804 } else {
805 tmp = (unsigned char *) in;
806 c = *tmp++;
807 c = c | (((unsigned int)*tmp) << 8);
808 in++;
809 }
810 if ((c & 0xFC00) == 0xD800) { /* surrogates */
811 if (in >= inend) { /* (in > inend) shouldn't happens */
812 break;
813 }
814 if (xmlLittleEndian) {
815 d = *in++;
816 } else {
817 tmp = (unsigned char *) in;
818 d = *tmp++;
819 d = d | (((unsigned int)*tmp) << 8);
820 in++;
821 }
822 if ((d & 0xFC00) == 0xDC00) {
823 c &= 0x03FF;
824 c <<= 10;
825 c |= d & 0x03FF;
826 c += 0x10000;
827 }
828 else {
829 *outlen = out - outstart;
830 *inlenb = processed - inb;
831 return(-2);
832 }
833 }
834
835 /* assertion: c is a single UTF-4 value */
836 if (out >= outend)
837 break;
838 if (c < 0x80) { *out++= c; bits= -6; }
839 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
840 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
841 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
842
843 for ( ; bits >= 0; bits-= 6) {
844 if (out >= outend)
845 break;
846 *out++= ((c >> bits) & 0x3F) | 0x80;
847 }
848 processed = (const unsigned char*) in;
849 }
850 *outlen = out - outstart;
851 *inlenb = processed - inb;
852 return(0);
853}
854
855/**
856 * UTF8ToUTF16LE:
857 * @outb: a pointer to an array of bytes to store the result
858 * @outlen: the length of @outb
859 * @in: a pointer to an array of UTF-8 chars
860 * @inlen: the length of @in
861 *
862 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
863 * block of chars out.
864 *
865 * Returns the number of byte written, or -1 by lack of space, or -2
866 * if the transcoding failed.
867 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000868static int
Owen Taylor3473f882001-02-23 17:55:21 +0000869UTF8ToUTF16LE(unsigned char* outb, int *outlen,
870 const unsigned char* in, int *inlen)
871{
872 unsigned short* out = (unsigned short*) outb;
873 const unsigned char* processed = in;
874 unsigned short* outstart= out;
875 unsigned short* outend;
876 const unsigned char* inend= in+*inlen;
877 unsigned int c, d;
878 int trailing;
879 unsigned char *tmp;
880 unsigned short tmp1, tmp2;
881
882 if (in == NULL) {
883 /*
884 * initialization, add the Byte Order Mark
885 */
886 if (*outlen >= 2) {
887 outb[0] = 0xFF;
888 outb[1] = 0xFE;
889 *outlen = 2;
890 *inlen = 0;
891#ifdef DEBUG_ENCODING
892 xmlGenericError(xmlGenericErrorContext,
893 "Added FFFE Byte Order Mark\n");
894#endif
895 return(2);
896 }
897 *outlen = 0;
898 *inlen = 0;
899 return(0);
900 }
901 outend = out + (*outlen / 2);
902 while (in < inend) {
903 d= *in++;
904 if (d < 0x80) { c= d; trailing= 0; }
905 else if (d < 0xC0) {
906 /* trailing byte in leading position */
907 *outlen = (out - outstart) * 2;
908 *inlen = processed - in;
909 return(-2);
910 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
911 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
912 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
913 else {
914 /* no chance for this in UTF-16 */
915 *outlen = (out - outstart) * 2;
916 *inlen = processed - in;
917 return(-2);
918 }
919
920 if (inend - in < trailing) {
921 break;
922 }
923
924 for ( ; trailing; trailing--) {
925 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
926 break;
927 c <<= 6;
928 c |= d & 0x3F;
929 }
930
931 /* assertion: c is a single UTF-4 value */
932 if (c < 0x10000) {
933 if (out >= outend)
934 break;
935 if (xmlLittleEndian) {
936 *out++ = c;
937 } else {
938 tmp = (unsigned char *) out;
939 *tmp = c ;
940 *(tmp + 1) = c >> 8 ;
941 out++;
942 }
943 }
944 else if (c < 0x110000) {
945 if (out+1 >= outend)
946 break;
947 c -= 0x10000;
948 if (xmlLittleEndian) {
949 *out++ = 0xD800 | (c >> 10);
950 *out++ = 0xDC00 | (c & 0x03FF);
951 } else {
952 tmp1 = 0xD800 | (c >> 10);
953 tmp = (unsigned char *) out;
954 *tmp = (unsigned char) tmp1;
955 *(tmp + 1) = tmp1 >> 8;
956 out++;
957
958 tmp2 = 0xDC00 | (c & 0x03FF);
959 tmp = (unsigned char *) out;
960 *tmp = (unsigned char) tmp2;
961 *(tmp + 1) = tmp2 >> 8;
962 out++;
963 }
964 }
965 else
966 break;
967 processed = in;
968 }
969 *outlen = (out - outstart) * 2;
970 *inlen = processed - in;
971 return(0);
972}
973
974/**
975 * UTF16BEToUTF8:
976 * @out: a pointer to an array of bytes to store the result
977 * @outlen: the length of @out
978 * @inb: a pointer to an array of UTF-16 passwd as a byte array
979 * @inlenb: the length of @in in UTF-16 chars
980 *
981 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000982 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000983 * is the same between the native type of this machine and the
984 * inputed one.
985 *
986 * Returns the number of byte written, or -1 by lack of space, or -2
987 * if the transcoding fails (for *in is not valid utf16 string)
988 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000989 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000990 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000991static int
Owen Taylor3473f882001-02-23 17:55:21 +0000992UTF16BEToUTF8(unsigned char* out, int *outlen,
993 const unsigned char* inb, int *inlenb)
994{
995 unsigned char* outstart = out;
996 const unsigned char* processed = inb;
997 unsigned char* outend = out + *outlen;
998 unsigned short* in = (unsigned short*) inb;
999 unsigned short* inend;
1000 unsigned int c, d, inlen;
1001 unsigned char *tmp;
1002 int bits;
1003
1004 if ((*inlenb % 2) == 1)
1005 (*inlenb)--;
1006 inlen = *inlenb / 2;
1007 inend= in + inlen;
1008 while (in < inend) {
1009 if (xmlLittleEndian) {
1010 tmp = (unsigned char *) in;
1011 c = *tmp++;
1012 c = c << 8;
1013 c = c | (unsigned int) *tmp;
1014 in++;
1015 } else {
1016 c= *in++;
1017 }
1018 if ((c & 0xFC00) == 0xD800) { /* surrogates */
1019 if (in >= inend) { /* (in > inend) shouldn't happens */
1020 *outlen = out - outstart;
1021 *inlenb = processed - inb;
1022 return(-2);
1023 }
1024 if (xmlLittleEndian) {
1025 tmp = (unsigned char *) in;
1026 d = *tmp++;
1027 d = d << 8;
1028 d = d | (unsigned int) *tmp;
1029 in++;
1030 } else {
1031 d= *in++;
1032 }
1033 if ((d & 0xFC00) == 0xDC00) {
1034 c &= 0x03FF;
1035 c <<= 10;
1036 c |= d & 0x03FF;
1037 c += 0x10000;
1038 }
1039 else {
1040 *outlen = out - outstart;
1041 *inlenb = processed - inb;
1042 return(-2);
1043 }
1044 }
1045
1046 /* assertion: c is a single UTF-4 value */
1047 if (out >= outend)
1048 break;
1049 if (c < 0x80) { *out++= c; bits= -6; }
1050 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
1051 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
1052 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1053
1054 for ( ; bits >= 0; bits-= 6) {
1055 if (out >= outend)
1056 break;
1057 *out++= ((c >> bits) & 0x3F) | 0x80;
1058 }
1059 processed = (const unsigned char*) in;
1060 }
1061 *outlen = out - outstart;
1062 *inlenb = processed - inb;
1063 return(0);
1064}
1065
1066/**
1067 * UTF8ToUTF16BE:
1068 * @outb: a pointer to an array of bytes to store the result
1069 * @outlen: the length of @outb
1070 * @in: a pointer to an array of UTF-8 chars
1071 * @inlen: the length of @in
1072 *
1073 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1074 * block of chars out.
1075 *
1076 * Returns the number of byte written, or -1 by lack of space, or -2
1077 * if the transcoding failed.
1078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001079static int
Owen Taylor3473f882001-02-23 17:55:21 +00001080UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1081 const unsigned char* in, int *inlen)
1082{
1083 unsigned short* out = (unsigned short*) outb;
1084 const unsigned char* processed = in;
1085 unsigned short* outstart= out;
1086 unsigned short* outend;
1087 const unsigned char* inend= in+*inlen;
1088 unsigned int c, d;
1089 int trailing;
1090 unsigned char *tmp;
1091 unsigned short tmp1, tmp2;
1092
1093 if (in == NULL) {
1094 /*
1095 * initialization, add the Byte Order Mark
1096 */
1097 if (*outlen >= 2) {
1098 outb[0] = 0xFE;
1099 outb[1] = 0xFF;
1100 *outlen = 2;
1101 *inlen = 0;
1102#ifdef DEBUG_ENCODING
1103 xmlGenericError(xmlGenericErrorContext,
1104 "Added FEFF Byte Order Mark\n");
1105#endif
1106 return(2);
1107 }
1108 *outlen = 0;
1109 *inlen = 0;
1110 return(0);
1111 }
1112 outend = out + (*outlen / 2);
1113 while (in < inend) {
1114 d= *in++;
1115 if (d < 0x80) { c= d; trailing= 0; }
1116 else if (d < 0xC0) {
1117 /* trailing byte in leading position */
1118 *outlen = out - outstart;
1119 *inlen = processed - in;
1120 return(-2);
1121 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1122 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1123 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1124 else {
1125 /* no chance for this in UTF-16 */
1126 *outlen = out - outstart;
1127 *inlen = processed - in;
1128 return(-2);
1129 }
1130
1131 if (inend - in < trailing) {
1132 break;
1133 }
1134
1135 for ( ; trailing; trailing--) {
1136 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
1137 c <<= 6;
1138 c |= d & 0x3F;
1139 }
1140
1141 /* assertion: c is a single UTF-4 value */
1142 if (c < 0x10000) {
1143 if (out >= outend) break;
1144 if (xmlLittleEndian) {
1145 tmp = (unsigned char *) out;
1146 *tmp = c >> 8;
1147 *(tmp + 1) = c;
1148 out++;
1149 } else {
1150 *out++ = c;
1151 }
1152 }
1153 else if (c < 0x110000) {
1154 if (out+1 >= outend) break;
1155 c -= 0x10000;
1156 if (xmlLittleEndian) {
1157 tmp1 = 0xD800 | (c >> 10);
1158 tmp = (unsigned char *) out;
1159 *tmp = tmp1 >> 8;
1160 *(tmp + 1) = (unsigned char) tmp1;
1161 out++;
1162
1163 tmp2 = 0xDC00 | (c & 0x03FF);
1164 tmp = (unsigned char *) out;
1165 *tmp = tmp2 >> 8;
1166 *(tmp + 1) = (unsigned char) tmp2;
1167 out++;
1168 } else {
1169 *out++ = 0xD800 | (c >> 10);
1170 *out++ = 0xDC00 | (c & 0x03FF);
1171 }
1172 }
1173 else
1174 break;
1175 processed = in;
1176 }
1177 *outlen = (out - outstart) * 2;
1178 *inlen = processed - in;
1179 return(0);
1180}
1181
Daniel Veillard97ac1312001-05-30 19:14:17 +00001182/************************************************************************
1183 * *
1184 * Generic encoding handling routines *
1185 * *
1186 ************************************************************************/
1187
Owen Taylor3473f882001-02-23 17:55:21 +00001188/**
1189 * xmlDetectCharEncoding:
1190 * @in: a pointer to the first bytes of the XML entity, must be at least
1191 * 4 bytes long.
1192 * @len: pointer to the length of the buffer
1193 *
1194 * Guess the encoding of the entity using the first bytes of the entity content
1195 * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
1196 *
1197 * Returns one of the XML_CHAR_ENCODING_... values.
1198 */
1199xmlCharEncoding
1200xmlDetectCharEncoding(const unsigned char* in, int len)
1201{
1202 if (len >= 4) {
1203 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1204 (in[2] == 0x00) && (in[3] == 0x3C))
1205 return(XML_CHAR_ENCODING_UCS4BE);
1206 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1207 (in[2] == 0x00) && (in[3] == 0x00))
1208 return(XML_CHAR_ENCODING_UCS4LE);
1209 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1210 (in[2] == 0x3C) && (in[3] == 0x00))
1211 return(XML_CHAR_ENCODING_UCS4_2143);
1212 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1213 (in[2] == 0x00) && (in[3] == 0x00))
1214 return(XML_CHAR_ENCODING_UCS4_3412);
1215 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
1216 (in[2] == 0xA7) && (in[3] == 0x94))
1217 return(XML_CHAR_ENCODING_EBCDIC);
1218 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
1219 (in[2] == 0x78) && (in[3] == 0x6D))
1220 return(XML_CHAR_ENCODING_UTF8);
1221 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001222 if (len >= 3) {
1223 /*
1224 * Errata on XML-1.0 June 20 2001
1225 * We now allow an UTF8 encoded BOM
1226 */
1227 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1228 (in[2] == 0xBF))
1229 return(XML_CHAR_ENCODING_UTF8);
1230 }
Owen Taylor3473f882001-02-23 17:55:21 +00001231 if (len >= 2) {
1232 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1233 return(XML_CHAR_ENCODING_UTF16BE);
1234 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1235 return(XML_CHAR_ENCODING_UTF16LE);
1236 }
1237 return(XML_CHAR_ENCODING_NONE);
1238}
1239
1240/**
1241 * xmlCleanupEncodingAliases:
1242 *
1243 * Unregisters all aliases
1244 */
1245void
1246xmlCleanupEncodingAliases(void) {
1247 int i;
1248
1249 if (xmlCharEncodingAliases == NULL)
1250 return;
1251
1252 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1253 if (xmlCharEncodingAliases[i].name != NULL)
1254 xmlFree((char *) xmlCharEncodingAliases[i].name);
1255 if (xmlCharEncodingAliases[i].alias != NULL)
1256 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1257 }
1258 xmlCharEncodingAliasesNb = 0;
1259 xmlCharEncodingAliasesMax = 0;
1260 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +00001261 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001262}
1263
1264/**
1265 * xmlGetEncodingAlias:
1266 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1267 *
1268 * Lookup an encoding name for the given alias.
1269 *
1270 * Returns NULL if not found the original name otherwise
1271 */
1272const char *
1273xmlGetEncodingAlias(const char *alias) {
1274 int i;
1275 char upper[100];
1276
1277 if (alias == NULL)
1278 return(NULL);
1279
1280 if (xmlCharEncodingAliases == NULL)
1281 return(NULL);
1282
1283 for (i = 0;i < 99;i++) {
1284 upper[i] = toupper(alias[i]);
1285 if (upper[i] == 0) break;
1286 }
1287 upper[i] = 0;
1288
1289 /*
1290 * Walk down the list looking for a definition of the alias
1291 */
1292 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1293 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1294 return(xmlCharEncodingAliases[i].name);
1295 }
1296 }
1297 return(NULL);
1298}
1299
1300/**
1301 * xmlAddEncodingAlias:
1302 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1303 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1304 *
1305 * Registers and alias @alias for an encoding named @name. Existing alias
1306 * will be overwritten.
1307 *
1308 * Returns 0 in case of success, -1 in case of error
1309 */
1310int
1311xmlAddEncodingAlias(const char *name, const char *alias) {
1312 int i;
1313 char upper[100];
1314
1315 if ((name == NULL) || (alias == NULL))
1316 return(-1);
1317
1318 for (i = 0;i < 99;i++) {
1319 upper[i] = toupper(alias[i]);
1320 if (upper[i] == 0) break;
1321 }
1322 upper[i] = 0;
1323
1324 if (xmlCharEncodingAliases == NULL) {
1325 xmlCharEncodingAliasesNb = 0;
1326 xmlCharEncodingAliasesMax = 20;
1327 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1328 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1329 if (xmlCharEncodingAliases == NULL)
1330 return(-1);
1331 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1332 xmlCharEncodingAliasesMax *= 2;
1333 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1334 xmlRealloc(xmlCharEncodingAliases,
1335 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1336 }
1337 /*
1338 * Walk down the list looking for a definition of the alias
1339 */
1340 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1341 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1342 /*
1343 * Replace the definition.
1344 */
1345 xmlFree((char *) xmlCharEncodingAliases[i].name);
1346 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1347 return(0);
1348 }
1349 }
1350 /*
1351 * Add the definition
1352 */
1353 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1354 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1355 xmlCharEncodingAliasesNb++;
1356 return(0);
1357}
1358
1359/**
1360 * xmlDelEncodingAlias:
1361 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1362 *
1363 * Unregisters an encoding alias @alias
1364 *
1365 * Returns 0 in case of success, -1 in case of error
1366 */
1367int
1368xmlDelEncodingAlias(const char *alias) {
1369 int i;
1370
1371 if (alias == NULL)
1372 return(-1);
1373
1374 if (xmlCharEncodingAliases == NULL)
1375 return(-1);
1376 /*
1377 * Walk down the list looking for a definition of the alias
1378 */
1379 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1380 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1381 xmlFree((char *) xmlCharEncodingAliases[i].name);
1382 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1383 xmlCharEncodingAliasesNb--;
1384 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1385 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1386 return(0);
1387 }
1388 }
1389 return(-1);
1390}
1391
1392/**
1393 * xmlParseCharEncoding:
1394 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1395 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001396 * Compare the string to the known encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001397 * that the comparison is case insensitive accordingly to the section
1398 * [XML] 4.3.3 Character Encoding in Entities.
1399 *
1400 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1401 * if not recognized.
1402 */
1403xmlCharEncoding
1404xmlParseCharEncoding(const char* name)
1405{
1406 const char *alias;
1407 char upper[500];
1408 int i;
1409
1410 if (name == NULL)
1411 return(XML_CHAR_ENCODING_NONE);
1412
1413 /*
1414 * Do the alias resolution
1415 */
1416 alias = xmlGetEncodingAlias(name);
1417 if (alias != NULL)
1418 name = alias;
1419
1420 for (i = 0;i < 499;i++) {
1421 upper[i] = toupper(name[i]);
1422 if (upper[i] == 0) break;
1423 }
1424 upper[i] = 0;
1425
1426 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1427 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1428 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1429
1430 /*
1431 * NOTE: if we were able to parse this, the endianness of UTF16 is
1432 * already found and in use
1433 */
1434 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1435 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1436
1437 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1438 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1439 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1440
1441 /*
1442 * NOTE: if we were able to parse this, the endianness of UCS4 is
1443 * already found and in use
1444 */
1445 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1446 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1447 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1448
1449
1450 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1451 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1452 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1453
1454 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1455 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1456 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1457
1458 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1459 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1460 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1461 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1462 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1463 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1464 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1465
1466 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1467 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1468 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1469
1470#ifdef DEBUG_ENCODING
1471 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1472#endif
1473 return(XML_CHAR_ENCODING_ERROR);
1474}
1475
1476/**
1477 * xmlGetCharEncodingName:
1478 * @enc: the encoding
1479 *
1480 * The "canonical" name for XML encoding.
1481 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1482 * Section 4.3.3 Character Encoding in Entities
1483 *
1484 * Returns the canonical name for the given encoding
1485 */
1486
1487const char*
1488xmlGetCharEncodingName(xmlCharEncoding enc) {
1489 switch (enc) {
1490 case XML_CHAR_ENCODING_ERROR:
1491 return(NULL);
1492 case XML_CHAR_ENCODING_NONE:
1493 return(NULL);
1494 case XML_CHAR_ENCODING_UTF8:
1495 return("UTF-8");
1496 case XML_CHAR_ENCODING_UTF16LE:
1497 return("UTF-16");
1498 case XML_CHAR_ENCODING_UTF16BE:
1499 return("UTF-16");
1500 case XML_CHAR_ENCODING_EBCDIC:
1501 return("EBCDIC");
1502 case XML_CHAR_ENCODING_UCS4LE:
1503 return("ISO-10646-UCS-4");
1504 case XML_CHAR_ENCODING_UCS4BE:
1505 return("ISO-10646-UCS-4");
1506 case XML_CHAR_ENCODING_UCS4_2143:
1507 return("ISO-10646-UCS-4");
1508 case XML_CHAR_ENCODING_UCS4_3412:
1509 return("ISO-10646-UCS-4");
1510 case XML_CHAR_ENCODING_UCS2:
1511 return("ISO-10646-UCS-2");
1512 case XML_CHAR_ENCODING_8859_1:
1513 return("ISO-8859-1");
1514 case XML_CHAR_ENCODING_8859_2:
1515 return("ISO-8859-2");
1516 case XML_CHAR_ENCODING_8859_3:
1517 return("ISO-8859-3");
1518 case XML_CHAR_ENCODING_8859_4:
1519 return("ISO-8859-4");
1520 case XML_CHAR_ENCODING_8859_5:
1521 return("ISO-8859-5");
1522 case XML_CHAR_ENCODING_8859_6:
1523 return("ISO-8859-6");
1524 case XML_CHAR_ENCODING_8859_7:
1525 return("ISO-8859-7");
1526 case XML_CHAR_ENCODING_8859_8:
1527 return("ISO-8859-8");
1528 case XML_CHAR_ENCODING_8859_9:
1529 return("ISO-8859-9");
1530 case XML_CHAR_ENCODING_2022_JP:
1531 return("ISO-2022-JP");
1532 case XML_CHAR_ENCODING_SHIFT_JIS:
1533 return("Shift-JIS");
1534 case XML_CHAR_ENCODING_EUC_JP:
1535 return("EUC-JP");
1536 case XML_CHAR_ENCODING_ASCII:
1537 return(NULL);
1538 }
1539 return(NULL);
1540}
1541
Daniel Veillard97ac1312001-05-30 19:14:17 +00001542/************************************************************************
1543 * *
1544 * Char encoding handlers *
1545 * *
1546 ************************************************************************/
1547
Owen Taylor3473f882001-02-23 17:55:21 +00001548
1549/* the size should be growable, but it's not a big deal ... */
1550#define MAX_ENCODING_HANDLERS 50
1551static xmlCharEncodingHandlerPtr *handlers = NULL;
1552static int nbCharEncodingHandler = 0;
1553
1554/*
1555 * The default is UTF-8 for XML, that's also the default used for the
1556 * parser internals, so the default encoding handler is NULL
1557 */
1558
1559static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1560
1561/**
1562 * xmlNewCharEncodingHandler:
1563 * @name: the encoding name, in UTF-8 format (ASCII actually)
1564 * @input: the xmlCharEncodingInputFunc to read that encoding
1565 * @output: the xmlCharEncodingOutputFunc to write that encoding
1566 *
1567 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001568 *
Owen Taylor3473f882001-02-23 17:55:21 +00001569 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1570 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001571xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001572xmlNewCharEncodingHandler(const char *name,
1573 xmlCharEncodingInputFunc input,
1574 xmlCharEncodingOutputFunc output) {
1575 xmlCharEncodingHandlerPtr handler;
1576 const char *alias;
1577 char upper[500];
1578 int i;
1579 char *up = 0;
1580
1581 /*
1582 * Do the alias resolution
1583 */
1584 alias = xmlGetEncodingAlias(name);
1585 if (alias != NULL)
1586 name = alias;
1587
1588 /*
1589 * Keep only the uppercase version of the encoding.
1590 */
1591 if (name == NULL) {
1592 xmlGenericError(xmlGenericErrorContext,
1593 "xmlNewCharEncodingHandler : no name !\n");
1594 return(NULL);
1595 }
1596 for (i = 0;i < 499;i++) {
1597 upper[i] = toupper(name[i]);
1598 if (upper[i] == 0) break;
1599 }
1600 upper[i] = 0;
1601 up = xmlMemStrdup(upper);
1602 if (up == NULL) {
1603 xmlGenericError(xmlGenericErrorContext,
1604 "xmlNewCharEncodingHandler : out of memory !\n");
1605 return(NULL);
1606 }
1607
1608 /*
1609 * allocate and fill-up an handler block.
1610 */
1611 handler = (xmlCharEncodingHandlerPtr)
1612 xmlMalloc(sizeof(xmlCharEncodingHandler));
1613 if (handler == NULL) {
1614 xmlGenericError(xmlGenericErrorContext,
1615 "xmlNewCharEncodingHandler : out of memory !\n");
1616 return(NULL);
1617 }
1618 handler->input = input;
1619 handler->output = output;
1620 handler->name = up;
1621
1622#ifdef LIBXML_ICONV_ENABLED
1623 handler->iconv_in = NULL;
1624 handler->iconv_out = NULL;
1625#endif /* LIBXML_ICONV_ENABLED */
1626
1627 /*
1628 * registers and returns the handler.
1629 */
1630 xmlRegisterCharEncodingHandler(handler);
1631#ifdef DEBUG_ENCODING
1632 xmlGenericError(xmlGenericErrorContext,
1633 "Registered encoding handler for %s\n", name);
1634#endif
1635 return(handler);
1636}
1637
1638/**
1639 * xmlInitCharEncodingHandlers:
1640 *
1641 * Initialize the char encoding support, it registers the default
1642 * encoding supported.
1643 * NOTE: while public, this function usually doesn't need to be called
1644 * in normal processing.
1645 */
1646void
1647xmlInitCharEncodingHandlers(void) {
1648 unsigned short int tst = 0x1234;
1649 unsigned char *ptr = (unsigned char *) &tst;
1650
1651 if (handlers != NULL) return;
1652
1653 handlers = (xmlCharEncodingHandlerPtr *)
1654 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1655
1656 if (*ptr == 0x12) xmlLittleEndian = 0;
1657 else if (*ptr == 0x34) xmlLittleEndian = 1;
1658 else xmlGenericError(xmlGenericErrorContext,
1659 "Odd problem at endianness detection\n");
1660
1661 if (handlers == NULL) {
1662 xmlGenericError(xmlGenericErrorContext,
1663 "xmlInitCharEncodingHandlers : out of memory !\n");
1664 return;
1665 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001666 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Owen Taylor3473f882001-02-23 17:55:21 +00001667 xmlUTF16LEHandler =
1668 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1669 xmlUTF16BEHandler =
1670 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1671 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1672 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001673 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001674#ifdef LIBXML_HTML_ENABLED
1675 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1676#endif
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001677#ifndef LIBXML_ICONV_ENABLED
1678#ifdef LIBXML_ISO8859X_ENABLED
1679 xmlRegisterCharEncodingHandlersISO8859x ();
1680#endif
1681#endif
1682
Owen Taylor3473f882001-02-23 17:55:21 +00001683}
1684
1685/**
1686 * xmlCleanupCharEncodingHandlers:
1687 *
1688 * Cleanup the memory allocated for the char encoding support, it
1689 * unregisters all the encoding handlers and the aliases.
1690 */
1691void
1692xmlCleanupCharEncodingHandlers(void) {
1693 xmlCleanupEncodingAliases();
1694
1695 if (handlers == NULL) return;
1696
1697 for (;nbCharEncodingHandler > 0;) {
1698 nbCharEncodingHandler--;
1699 if (handlers[nbCharEncodingHandler] != NULL) {
1700 if (handlers[nbCharEncodingHandler]->name != NULL)
1701 xmlFree(handlers[nbCharEncodingHandler]->name);
1702 xmlFree(handlers[nbCharEncodingHandler]);
1703 }
1704 }
1705 xmlFree(handlers);
1706 handlers = NULL;
1707 nbCharEncodingHandler = 0;
1708 xmlDefaultCharEncodingHandler = NULL;
1709}
1710
1711/**
1712 * xmlRegisterCharEncodingHandler:
1713 * @handler: the xmlCharEncodingHandlerPtr handler block
1714 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001715 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001716 */
1717void
1718xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1719 if (handlers == NULL) xmlInitCharEncodingHandlers();
1720 if (handler == NULL) {
1721 xmlGenericError(xmlGenericErrorContext,
1722 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1723 return;
1724 }
1725
1726 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1727 xmlGenericError(xmlGenericErrorContext,
1728 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1729 xmlGenericError(xmlGenericErrorContext,
1730 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1731 return;
1732 }
1733 handlers[nbCharEncodingHandler++] = handler;
1734}
1735
1736/**
1737 * xmlGetCharEncodingHandler:
1738 * @enc: an xmlCharEncoding value.
1739 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001740 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001741 *
1742 * Returns the handler or NULL if not found
1743 */
1744xmlCharEncodingHandlerPtr
1745xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1746 xmlCharEncodingHandlerPtr handler;
1747
1748 if (handlers == NULL) xmlInitCharEncodingHandlers();
1749 switch (enc) {
1750 case XML_CHAR_ENCODING_ERROR:
1751 return(NULL);
1752 case XML_CHAR_ENCODING_NONE:
1753 return(NULL);
1754 case XML_CHAR_ENCODING_UTF8:
1755 return(NULL);
1756 case XML_CHAR_ENCODING_UTF16LE:
1757 return(xmlUTF16LEHandler);
1758 case XML_CHAR_ENCODING_UTF16BE:
1759 return(xmlUTF16BEHandler);
1760 case XML_CHAR_ENCODING_EBCDIC:
1761 handler = xmlFindCharEncodingHandler("EBCDIC");
1762 if (handler != NULL) return(handler);
1763 handler = xmlFindCharEncodingHandler("ebcdic");
1764 if (handler != NULL) return(handler);
1765 break;
1766 case XML_CHAR_ENCODING_UCS4BE:
1767 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1768 if (handler != NULL) return(handler);
1769 handler = xmlFindCharEncodingHandler("UCS-4");
1770 if (handler != NULL) return(handler);
1771 handler = xmlFindCharEncodingHandler("UCS4");
1772 if (handler != NULL) return(handler);
1773 break;
1774 case XML_CHAR_ENCODING_UCS4LE:
1775 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1776 if (handler != NULL) return(handler);
1777 handler = xmlFindCharEncodingHandler("UCS-4");
1778 if (handler != NULL) return(handler);
1779 handler = xmlFindCharEncodingHandler("UCS4");
1780 if (handler != NULL) return(handler);
1781 break;
1782 case XML_CHAR_ENCODING_UCS4_2143:
1783 break;
1784 case XML_CHAR_ENCODING_UCS4_3412:
1785 break;
1786 case XML_CHAR_ENCODING_UCS2:
1787 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1788 if (handler != NULL) return(handler);
1789 handler = xmlFindCharEncodingHandler("UCS-2");
1790 if (handler != NULL) return(handler);
1791 handler = xmlFindCharEncodingHandler("UCS2");
1792 if (handler != NULL) return(handler);
1793 break;
1794
1795 /*
1796 * We used to keep ISO Latin encodings native in the
1797 * generated data. This led to so many problems that
1798 * this has been removed. One can still change this
1799 * back by registering no-ops encoders for those
1800 */
1801 case XML_CHAR_ENCODING_8859_1:
1802 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1803 if (handler != NULL) return(handler);
1804 break;
1805 case XML_CHAR_ENCODING_8859_2:
1806 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1807 if (handler != NULL) return(handler);
1808 break;
1809 case XML_CHAR_ENCODING_8859_3:
1810 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1811 if (handler != NULL) return(handler);
1812 break;
1813 case XML_CHAR_ENCODING_8859_4:
1814 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1815 if (handler != NULL) return(handler);
1816 break;
1817 case XML_CHAR_ENCODING_8859_5:
1818 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1819 if (handler != NULL) return(handler);
1820 break;
1821 case XML_CHAR_ENCODING_8859_6:
1822 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1823 if (handler != NULL) return(handler);
1824 break;
1825 case XML_CHAR_ENCODING_8859_7:
1826 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1827 if (handler != NULL) return(handler);
1828 break;
1829 case XML_CHAR_ENCODING_8859_8:
1830 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1831 if (handler != NULL) return(handler);
1832 break;
1833 case XML_CHAR_ENCODING_8859_9:
1834 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1835 if (handler != NULL) return(handler);
1836 break;
1837
1838
1839 case XML_CHAR_ENCODING_2022_JP:
1840 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1841 if (handler != NULL) return(handler);
1842 break;
1843 case XML_CHAR_ENCODING_SHIFT_JIS:
1844 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1845 if (handler != NULL) return(handler);
1846 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1847 if (handler != NULL) return(handler);
1848 handler = xmlFindCharEncodingHandler("Shift_JIS");
1849 if (handler != NULL) return(handler);
1850 break;
1851 case XML_CHAR_ENCODING_EUC_JP:
1852 handler = xmlFindCharEncodingHandler("EUC-JP");
1853 if (handler != NULL) return(handler);
1854 break;
1855 default:
1856 break;
1857 }
1858
1859#ifdef DEBUG_ENCODING
1860 xmlGenericError(xmlGenericErrorContext,
1861 "No handler found for encoding %d\n", enc);
1862#endif
1863 return(NULL);
1864}
1865
1866/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001867 * xmlFindCharEncodingHandler:
1868 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001869 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001870 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001871 *
1872 * Returns the handler or NULL if not found
1873 */
1874xmlCharEncodingHandlerPtr
1875xmlFindCharEncodingHandler(const char *name) {
1876 const char *nalias;
1877 const char *norig;
1878 xmlCharEncoding alias;
1879#ifdef LIBXML_ICONV_ENABLED
1880 xmlCharEncodingHandlerPtr enc;
1881 iconv_t icv_in, icv_out;
1882#endif /* LIBXML_ICONV_ENABLED */
1883 char upper[100];
1884 int i;
1885
1886 if (handlers == NULL) xmlInitCharEncodingHandlers();
1887 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1888 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1889
1890 /*
1891 * Do the alias resolution
1892 */
1893 norig = name;
1894 nalias = xmlGetEncodingAlias(name);
1895 if (nalias != NULL)
1896 name = nalias;
1897
1898 /*
1899 * Check first for directly registered encoding names
1900 */
1901 for (i = 0;i < 99;i++) {
1902 upper[i] = toupper(name[i]);
1903 if (upper[i] == 0) break;
1904 }
1905 upper[i] = 0;
1906
1907 for (i = 0;i < nbCharEncodingHandler; i++)
1908 if (!strcmp(upper, handlers[i]->name)) {
1909#ifdef DEBUG_ENCODING
1910 xmlGenericError(xmlGenericErrorContext,
1911 "Found registered handler for encoding %s\n", name);
1912#endif
1913 return(handlers[i]);
1914 }
1915
1916#ifdef LIBXML_ICONV_ENABLED
1917 /* check whether iconv can handle this */
1918 icv_in = iconv_open("UTF-8", name);
1919 icv_out = iconv_open(name, "UTF-8");
1920 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1921 enc = (xmlCharEncodingHandlerPtr)
1922 xmlMalloc(sizeof(xmlCharEncodingHandler));
1923 if (enc == NULL) {
1924 iconv_close(icv_in);
1925 iconv_close(icv_out);
1926 return(NULL);
1927 }
1928 enc->name = xmlMemStrdup(name);
1929 enc->input = NULL;
1930 enc->output = NULL;
1931 enc->iconv_in = icv_in;
1932 enc->iconv_out = icv_out;
1933#ifdef DEBUG_ENCODING
1934 xmlGenericError(xmlGenericErrorContext,
1935 "Found iconv handler for encoding %s\n", name);
1936#endif
1937 return enc;
1938 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1939 xmlGenericError(xmlGenericErrorContext,
1940 "iconv : problems with filters for '%s'\n", name);
1941 }
1942#endif /* LIBXML_ICONV_ENABLED */
1943
1944#ifdef DEBUG_ENCODING
1945 xmlGenericError(xmlGenericErrorContext,
1946 "No handler found for encoding %s\n", name);
1947#endif
1948
1949 /*
1950 * Fallback using the canonical names
1951 */
1952 alias = xmlParseCharEncoding(norig);
1953 if (alias != XML_CHAR_ENCODING_ERROR) {
1954 const char* canon;
1955 canon = xmlGetCharEncodingName(alias);
1956 if ((canon != NULL) && (strcmp(name, canon))) {
1957 return(xmlFindCharEncodingHandler(canon));
1958 }
1959 }
1960
1961 return(NULL);
1962}
1963
Daniel Veillard97ac1312001-05-30 19:14:17 +00001964/************************************************************************
1965 * *
1966 * ICONV based generic conversion functions *
1967 * *
1968 ************************************************************************/
1969
Owen Taylor3473f882001-02-23 17:55:21 +00001970#ifdef LIBXML_ICONV_ENABLED
1971/**
1972 * xmlIconvWrapper:
1973 * @cd: iconv converter data structure
1974 * @out: a pointer to an array of bytes to store the result
1975 * @outlen: the length of @out
1976 * @in: a pointer to an array of ISO Latin 1 chars
1977 * @inlen: the length of @in
1978 *
1979 * Returns 0 if success, or
1980 * -1 by lack of space, or
1981 * -2 if the transcoding fails (for *in is not valid utf8 string or
1982 * the result of transformation can't fit into the encoding we want), or
1983 * -3 if there the last byte can't form a single output char.
1984 *
1985 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001986 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001987 * The value of @outlen after return is the number of ocetes consumed.
1988 */
1989static int
1990xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00001991 unsigned char *out, int *outlen,
1992 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00001993
Daniel Veillard9403a042001-05-28 11:00:53 +00001994 size_t icv_inlen = *inlen, icv_outlen = *outlen;
1995 const char *icv_in = (const char *) in;
1996 char *icv_out = (char *) out;
1997 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001998
Darin Adler699613b2001-07-27 22:47:14 +00001999 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00002000 if (in != NULL) {
2001 *inlen -= icv_inlen;
2002 *outlen -= icv_outlen;
2003 } else {
2004 *inlen = 0;
2005 *outlen = 0;
2006 }
2007 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002008#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00002009 if (errno == EILSEQ) {
2010 return -2;
2011 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002012#endif
2013#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00002014 if (errno == E2BIG) {
2015 return -1;
2016 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002017#endif
2018#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00002019 if (errno == EINVAL) {
2020 return -3;
2021 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002022#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00002023 {
2024 return -3;
2025 }
2026 }
2027 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002028}
2029#endif /* LIBXML_ICONV_ENABLED */
2030
Daniel Veillard97ac1312001-05-30 19:14:17 +00002031/************************************************************************
2032 * *
2033 * The real API used by libxml for on-the-fly conversion *
2034 * *
2035 ************************************************************************/
2036
Owen Taylor3473f882001-02-23 17:55:21 +00002037/**
2038 * xmlCharEncFirstLine:
2039 * @handler: char enconding transformation data structure
2040 * @out: an xmlBuffer for the output.
2041 * @in: an xmlBuffer for the input
2042 *
2043 * Front-end for the encoding handler input function, but handle only
2044 * the very first line, i.e. limit itself to 45 chars.
2045 *
2046 * Returns the number of byte written if success, or
2047 * -1 general error
2048 * -2 if the transcoding fails (for *in is not valid utf8 string or
2049 * the result of transformation can't fit into the encoding we want), or
2050 */
2051int
2052xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2053 xmlBufferPtr in) {
2054 int ret = -2;
2055 int written;
2056 int toconv;
2057
2058 if (handler == NULL) return(-1);
2059 if (out == NULL) return(-1);
2060 if (in == NULL) return(-1);
2061
2062 written = out->size - out->use;
2063 toconv = in->use;
2064 if (toconv * 2 >= written) {
2065 xmlBufferGrow(out, toconv);
2066 written = out->size - out->use - 1;
2067 }
2068
2069 /*
2070 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2071 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002072 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00002073 */
2074 written = 45;
2075
2076 if (handler->input != NULL) {
2077 ret = handler->input(&out->content[out->use], &written,
2078 in->content, &toconv);
2079 xmlBufferShrink(in, toconv);
2080 out->use += written;
2081 out->content[out->use] = 0;
2082 }
2083#ifdef LIBXML_ICONV_ENABLED
2084 else if (handler->iconv_in != NULL) {
2085 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2086 &written, in->content, &toconv);
2087 xmlBufferShrink(in, toconv);
2088 out->use += written;
2089 out->content[out->use] = 0;
2090 if (ret == -1) ret = -3;
2091 }
2092#endif /* LIBXML_ICONV_ENABLED */
2093#ifdef DEBUG_ENCODING
2094 switch (ret) {
2095 case 0:
2096 xmlGenericError(xmlGenericErrorContext,
2097 "converted %d bytes to %d bytes of input\n",
2098 toconv, written);
2099 break;
2100 case -1:
2101 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2102 toconv, written, in->use);
2103 break;
2104 case -2:
2105 xmlGenericError(xmlGenericErrorContext,
2106 "input conversion failed due to input error\n");
2107 break;
2108 case -3:
2109 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2110 toconv, written, in->use);
2111 break;
2112 default:
2113 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2114 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002115#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002116 /*
2117 * Ignore when input buffer is not on a boundary
2118 */
2119 if (ret == -3) ret = 0;
2120 if (ret == -1) ret = 0;
2121 return(ret);
2122}
2123
2124/**
2125 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002127 * @out: an xmlBuffer for the output.
2128 * @in: an xmlBuffer for the input
2129 *
2130 * Generic front-end for the encoding handler input function
2131 *
2132 * Returns the number of byte written if success, or
2133 * -1 general error
2134 * -2 if the transcoding fails (for *in is not valid utf8 string or
2135 * the result of transformation can't fit into the encoding we want), or
2136 */
2137int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002138xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2139 xmlBufferPtr in)
2140{
Owen Taylor3473f882001-02-23 17:55:21 +00002141 int ret = -2;
2142 int written;
2143 int toconv;
2144
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002145 if (handler == NULL)
2146 return (-1);
2147 if (out == NULL)
2148 return (-1);
2149 if (in == NULL)
2150 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002151
2152 toconv = in->use;
2153 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002154 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002155 written = out->size - out->use;
2156 if (toconv * 2 >= written) {
2157 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002158 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002159 }
2160 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002161 ret = handler->input(&out->content[out->use], &written,
2162 in->content, &toconv);
2163 xmlBufferShrink(in, toconv);
2164 out->use += written;
2165 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002166 }
2167#ifdef LIBXML_ICONV_ENABLED
2168 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002169 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2170 &written, in->content, &toconv);
2171 xmlBufferShrink(in, toconv);
2172 out->use += written;
2173 out->content[out->use] = 0;
2174 if (ret == -1)
2175 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002176 }
2177#endif /* LIBXML_ICONV_ENABLED */
2178 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002179 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002180#ifdef DEBUG_ENCODING
2181 xmlGenericError(xmlGenericErrorContext,
2182 "converted %d bytes to %d bytes of input\n",
2183 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002184#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002185 break;
2186 case -1:
2187#ifdef DEBUG_ENCODING
2188 xmlGenericError(xmlGenericErrorContext,
2189 "converted %d bytes to %d bytes of input, %d left\n",
2190 toconv, written, in->use);
2191#endif
2192 break;
2193 case -3:
2194#ifdef DEBUG_ENCODING
2195 xmlGenericError(xmlGenericErrorContext,
2196 "converted %d bytes to %d bytes of input, %d left\n",
2197 toconv, written, in->use);
2198#endif
2199 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002200 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002201 xmlGenericError(xmlGenericErrorContext,
2202 "input conversion failed due to input error\n");
2203 xmlGenericError(xmlGenericErrorContext,
2204 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2205 in->content[0], in->content[1],
2206 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00002207 }
2208 /*
2209 * Ignore when input buffer is not on a boundary
2210 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002211 if (ret == -3)
2212 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00002213 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00002214}
2215
2216/**
2217 * xmlCharEncOutFunc:
2218 * @handler: char enconding transformation data structure
2219 * @out: an xmlBuffer for the output.
2220 * @in: an xmlBuffer for the input
2221 *
2222 * Generic front-end for the encoding handler output function
2223 * a first call with @in == NULL has to be made firs to initiate the
2224 * output in case of non-stateless encoding needing to initiate their
2225 * state or the output (like the BOM in UTF16).
2226 * In case of UTF8 sequence conversion errors for the given encoder,
2227 * the content will be automatically remapped to a CharRef sequence.
2228 *
2229 * Returns the number of byte written if success, or
2230 * -1 general error
2231 * -2 if the transcoding fails (for *in is not valid utf8 string or
2232 * the result of transformation can't fit into the encoding we want), or
2233 */
2234int
2235xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2236 xmlBufferPtr in) {
2237 int ret = -2;
2238 int written;
2239 int writtentot = 0;
2240 int toconv;
2241 int output = 0;
2242
2243 if (handler == NULL) return(-1);
2244 if (out == NULL) return(-1);
2245
2246retry:
2247
2248 written = out->size - out->use;
2249
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002250 if (written > 0)
2251 written--; /* Gennady: count '/0' */
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 /*
2254 * First specific handling of in = NULL, i.e. the initialization call
2255 */
2256 if (in == NULL) {
2257 toconv = 0;
2258 if (handler->output != NULL) {
2259 ret = handler->output(&out->content[out->use], &written,
2260 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002261 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002262 out->use += written;
2263 out->content[out->use] = 0;
2264 }
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266#ifdef LIBXML_ICONV_ENABLED
2267 else if (handler->iconv_out != NULL) {
2268 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2269 &written, NULL, &toconv);
2270 out->use += written;
2271 out->content[out->use] = 0;
2272 }
2273#endif /* LIBXML_ICONV_ENABLED */
2274#ifdef DEBUG_ENCODING
2275 xmlGenericError(xmlGenericErrorContext,
2276 "initialized encoder\n");
2277#endif
2278 return(0);
2279 }
2280
2281 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002282 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002283 */
2284 toconv = in->use;
2285 if (toconv == 0)
2286 return(0);
2287 if (toconv * 2 >= written) {
2288 xmlBufferGrow(out, toconv * 2);
2289 written = out->size - out->use - 1;
2290 }
2291 if (handler->output != NULL) {
2292 ret = handler->output(&out->content[out->use], &written,
2293 in->content, &toconv);
2294 xmlBufferShrink(in, toconv);
2295 out->use += written;
2296 writtentot += written;
2297 out->content[out->use] = 0;
2298 }
2299#ifdef LIBXML_ICONV_ENABLED
2300 else if (handler->iconv_out != NULL) {
2301 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2302 &written, in->content, &toconv);
2303 xmlBufferShrink(in, toconv);
2304 out->use += written;
2305 writtentot += written;
2306 out->content[out->use] = 0;
2307 if (ret == -1) {
2308 if (written > 0) {
2309 /*
2310 * Can be a limitation of iconv
2311 */
2312 goto retry;
2313 }
2314 ret = -3;
2315 }
2316 }
2317#endif /* LIBXML_ICONV_ENABLED */
2318 else {
2319 xmlGenericError(xmlGenericErrorContext,
2320 "xmlCharEncOutFunc: no output function !\n");
2321 return(-1);
2322 }
2323
2324 if (ret >= 0) output += ret;
2325
2326 /*
2327 * Attempt to handle error cases
2328 */
2329 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002330 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002331#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002332 xmlGenericError(xmlGenericErrorContext,
2333 "converted %d bytes to %d bytes of output\n",
2334 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002335#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002336 break;
2337 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002338#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002339 xmlGenericError(xmlGenericErrorContext,
2340 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002341#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002342 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002343 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002344#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002345 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2346 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002347#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002348 break;
2349 case -2: {
2350 int len = in->use;
2351 const xmlChar *utf = (const xmlChar *) in->content;
2352 int cur;
2353
2354 cur = xmlGetUTF8Char(utf, &len);
2355 if (cur > 0) {
2356 xmlChar charref[20];
2357
2358#ifdef DEBUG_ENCODING
2359 xmlGenericError(xmlGenericErrorContext,
2360 "handling output conversion error\n");
2361 xmlGenericError(xmlGenericErrorContext,
2362 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2363 in->content[0], in->content[1],
2364 in->content[2], in->content[3]);
2365#endif
2366 /*
2367 * Removes the UTF8 sequence, and replace it by a charref
2368 * and continue the transcoding phase, hoping the error
2369 * did not mangle the encoder state.
2370 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002371 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002372 xmlBufferShrink(in, len);
2373 xmlBufferAddHead(in, charref, -1);
2374
2375 goto retry;
2376 } else {
2377 xmlGenericError(xmlGenericErrorContext,
2378 "output conversion failed due to conv error\n");
2379 xmlGenericError(xmlGenericErrorContext,
2380 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2381 in->content[0], in->content[1],
2382 in->content[2], in->content[3]);
2383 in->content[0] = ' ';
2384 }
2385 break;
2386 }
2387 }
2388 return(ret);
2389}
2390
2391/**
2392 * xmlCharEncCloseFunc:
2393 * @handler: char enconding transformation data structure
2394 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002395 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002396 *
2397 * Returns 0 if success, or -1 in case of error
2398 */
2399int
2400xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2401 int ret = 0;
2402 if (handler == NULL) return(-1);
2403 if (handler->name == NULL) return(-1);
2404#ifdef LIBXML_ICONV_ENABLED
2405 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002406 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002407 * and the associated icon resources.
2408 */
2409 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2410 if (handler->name != NULL)
2411 xmlFree(handler->name);
2412 handler->name = NULL;
2413 if (handler->iconv_out != NULL) {
2414 if (iconv_close(handler->iconv_out))
2415 ret = -1;
2416 handler->iconv_out = NULL;
2417 }
2418 if (handler->iconv_in != NULL) {
2419 if (iconv_close(handler->iconv_in))
2420 ret = -1;
2421 handler->iconv_in = NULL;
2422 }
2423 xmlFree(handler);
2424 }
2425#endif /* LIBXML_ICONV_ENABLED */
2426#ifdef DEBUG_ENCODING
2427 if (ret)
2428 xmlGenericError(xmlGenericErrorContext,
2429 "failed to close the encoding handler\n");
2430 else
2431 xmlGenericError(xmlGenericErrorContext,
2432 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002433#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002434
Owen Taylor3473f882001-02-23 17:55:21 +00002435 return(ret);
2436}
2437
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002438#ifndef LIBXML_ICONV_ENABLED
2439#ifdef LIBXML_ISO8859X_ENABLED
2440
2441/**
2442 * UTF8ToISO8859x:
2443 * @out: a pointer to an array of bytes to store the result
2444 * @outlen: the length of @out
2445 * @in: a pointer to an array of UTF-8 chars
2446 * @inlen: the length of @in
2447 * @xlattable: the 2-level transcoding table
2448 *
2449 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2450 * block of chars out.
2451 *
2452 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2453 * The value of @inlen after return is the number of octets consumed
2454 * as the return value is positive, else unpredictable.
2455 * The value of @outlen after return is the number of ocetes consumed.
2456 */
2457static int
2458UTF8ToISO8859x(unsigned char* out, int *outlen,
2459 const unsigned char* in, int *inlen,
2460 unsigned char const *xlattable) {
2461 const unsigned char* outend;
2462 const unsigned char* outstart = out;
2463 const unsigned char* inend;
2464 const unsigned char* instart = in;
2465
2466 if (in == NULL) {
2467 /*
2468 * initialization nothing to do
2469 */
2470 *outlen = 0;
2471 *inlen = 0;
2472 return(0);
2473 }
2474 inend = in + (*inlen);
2475 outend = out + (*outlen);
2476 while (in < inend) {
2477 unsigned char d = *in++;
2478 if (d < 0x80) {
2479 *out++ = d;
2480 } else if (d < 0xC0) {
2481 /* trailing byte in leading position */
2482 *outlen = out - outstart;
2483 *inlen = in - instart - 1;
2484 return(-2);
2485 } else if (d < 0xE0) {
2486 unsigned char c;
2487 if (!(in < inend)) {
2488 /* trailing byte not in input buffer */
2489 *outlen = out - outstart;
2490 *inlen = in - instart - 1;
2491 return(-2);
2492 }
2493 c = *in++;
2494 if (c & 0xC0 != 0xC0) {
2495 /* not a trailing byte */
2496 *outlen = out - outstart;
2497 *inlen = in - instart - 2;
2498 return(-2);
2499 }
2500 c = c & 0x3F;
2501 d = d & 0x1F;
2502 d = xlattable [48 + c + xlattable [d] * 64];
2503 if (d == 0) {
2504 /* not in character set */
2505 *outlen = out - outstart;
2506 *inlen = in - instart - 2;
2507 return(-2);
2508 }
2509 *out++ = d;
2510 } else if (d < 0xF0) {
2511 unsigned char c1;
2512 unsigned char c2;
2513 if (!(in < inend - 1)) {
2514 /* trailing bytes not in input buffer */
2515 *outlen = out - outstart;
2516 *inlen = in - instart - 1;
2517 return(-2);
2518 }
2519 c1 = *in++;
2520 if (c1 & 0xC0 != 0xC0) {
2521 /* not a trailing byte (c1) */
2522 *outlen = out - outstart;
2523 *inlen = in - instart - 2;
2524 return(-2);
2525 }
2526 c2 = *in++;
2527 if (c2 & 0xC0 != 0xC0) {
2528 /* not a trailing byte (c2) */
2529 *outlen = out - outstart;
2530 *inlen = in - instart - 2;
2531 return(-2);
2532 }
2533 c1 = c1 & 0x3F;
2534 c2 = c2 & 0x3F;
2535 d = d & 0x0F;
2536 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2537 if (d == 0) {
2538 /* not in character set */
2539 *outlen = out - outstart;
2540 *inlen = in - instart - 3;
2541 return(-2);
2542 }
2543 *out++ = d;
2544 } else {
2545 /* cannot transcode >= U+010000 */
2546 *outlen = out - outstart;
2547 *inlen = in - instart - 1;
2548 return(-2);
2549 }
2550 }
2551 *outlen = out - outstart;
2552 *inlen = in - instart;
2553 return(0);
2554}
2555
2556/**
2557 * ISO8859xToUTF8
2558 * @out: a pointer to an array of bytes to store the result
2559 * @outlen: the length of @out
2560 * @in: a pointer to an array of ISO Latin 1 chars
2561 * @inlen: the length of @in
2562 *
2563 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2564 * block of chars out.
2565 * Returns 0 if success, or -1 otherwise
2566 * The value of @inlen after return is the number of octets consumed
2567 * The value of @outlen after return is the number of ocetes produced.
2568 */
2569static int
2570ISO8859xToUTF8(unsigned char* out, int *outlen,
2571 const unsigned char* in, int *inlen,
2572 unsigned short const *unicodetable) {
2573 unsigned char* outstart = out;
2574 unsigned char* outend = out + *outlen;
2575 const unsigned char* instart = in;
2576 const unsigned char* inend = in + *inlen;
2577 const unsigned char* instop = inend;
2578 unsigned int c = *in;
2579
2580 while (in < inend && out < outend - 1) {
2581 if (c >= 0x80) {
2582 c = unicodetable [c - 0x80];
2583 if (c == 0) {
2584 /* undefined code point */
2585 *outlen = out - outstart;
2586 *inlen = in - instart;
2587 return (-1);
2588 }
2589 if (c < 0x800) {
2590 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2591 *out++ = (c & 0x3F) | 0x80;
2592 } else {
2593 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2594 *out++ = ((c >> 6) & 0x3F) | 0x80;
2595 *out++ = (c & 0x3F) | 0x80;
2596 }
2597 ++in;
2598 c = *in;
2599 }
2600 if (instop - in > outend - out) instop = in + (outend - out);
2601 while (c < 0x80 && in < instop) {
2602 *out++ = c;
2603 ++in;
2604 c = *in;
2605 }
2606 }
2607 if (in < inend && out < outend && c < 0x80) {
2608 *out++ = c;
2609 ++in;
2610 }
2611 *outlen = out - outstart;
2612 *inlen = in - instart;
2613 return (0);
2614}
2615
2616
2617/************************************************************************
2618 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2619 ************************************************************************/
2620
2621static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2622 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2623 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2624 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2625 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2626 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2627 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2628 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2629 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2630 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2631 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2632 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2633 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2634 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2635 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2636 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2637 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2638};
2639
2640static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2641 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2646 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2648 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2649 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2650 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2651 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2652 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2653 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2656 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2657 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2661 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2662 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2663 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2664 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2665 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2666 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2667 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2668};
2669
2670static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2671 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2672 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2673 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2674 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2675 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2676 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2677 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2678 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2679 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2680 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2681 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2682 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2683 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2684 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2685 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2686 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2687};
2688
2689static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2690 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2698 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2699 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2700 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2701 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2703 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2717 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2718 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2719 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2720 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2721};
2722
2723static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2724 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2725 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2726 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2727 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2728 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2729 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2730 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2731 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2732 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2733 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2734 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2735 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2736 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2737 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2738 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2739 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2740};
2741
2742static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2743 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2751 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2752 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2753 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2754 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2755 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2757 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2761 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2767 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2768 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2769 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2770};
2771
2772static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2773 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2774 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2775 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2776 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2777 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2778 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2779 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2780 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2781 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2782 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2783 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2784 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2785 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2786 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2787 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2788 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2789};
2790
2791static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2792 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2793 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2800 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2801 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2804 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2805 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2806 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2807 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2808 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819};
2820
2821static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2822 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2823 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2824 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2825 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2826 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2827 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2828 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2829 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2830 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2831 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2832 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2833 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2834 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2835 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2836 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2837 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2838};
2839
2840static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2841 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2844 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2849 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2850 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2858 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2859 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2860 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2861 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864};
2865
2866static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2867 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2868 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2869 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2870 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2871 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2872 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2873 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2874 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2875 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2876 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2877 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2878 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2879 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2880 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2881 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2882 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2883};
2884
2885static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2886 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2894 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2895 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2896 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2897 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2900 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2902 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2910 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2911 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2912 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2913 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917};
2918
2919static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2920 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2921 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2922 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2923 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2924 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2925 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2926 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2927 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2928 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2929 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2930 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2931 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2932 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2933 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2934 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2935 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2936};
2937
2938static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2939 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2947 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2948 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2949 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2950 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2963 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2968 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970};
2971
2972static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2973 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2974 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2975 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2976 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2977 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2978 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2979 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2980 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2981 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2982 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2983 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2984 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2985 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2986 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2987 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2988 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2989};
2990
2991static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2992 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3000 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3001 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3002 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3003 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3004 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3005 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3006 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015};
3016
3017static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3018 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3019 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3020 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3021 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3022 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3023 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3024 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3025 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3026 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3027 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3028 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3029 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3030 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3031 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3032 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3033 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3034};
3035
3036static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3037 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3040 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3045 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3046 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3047 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3049 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3051 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3053 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3055 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3056 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3065 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3066 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3067 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3068};
3069
3070static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3071 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3072 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3073 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3074 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3075 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3076 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3077 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3078 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3079 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3080 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3081 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3082 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3083 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3084 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3085 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3086 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3087};
3088
3089static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3090 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3098 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3099 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3105 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3106 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3107 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3108 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3109 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3114 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117};
3118
3119static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3120 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3121 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3122 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3123 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3124 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3125 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3126 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3127 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3128 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3129 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3130 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3131 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3132 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3133 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3134 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3135 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3136};
3137
3138static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3139 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3147 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3148 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3149 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3150 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3159 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3160 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3161 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3162 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3164 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3165 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3166 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3167 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3169 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3170};
3171
3172static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3173 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3174 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3175 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3176 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3177 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3178 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3179 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3180 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3181 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3182 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3183 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3184 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3185 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3186 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3187 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3188 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3189};
3190
3191static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3192 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3200 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3201 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3202 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3207 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3227 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3232 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3233 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3234 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3235};
3236
3237static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3238 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3239 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3240 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3241 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3242 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3243 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3244 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3245 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3246 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3247 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3248 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3249 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3250 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3251 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3252 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3253 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3254};
3255
3256static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3257 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3265 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3266 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3267 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3268 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3280 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3281 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3282 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3283 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3284};
3285
3286static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3287 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3288 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3289 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3290 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3291 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3292 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3293 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3294 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3295 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3296 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3297 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3298 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3299 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3300 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3301 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3302 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3303};
3304
3305static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3306 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3314 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3315 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3316 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3317 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3318 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3323 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3325 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3342 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3343 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3344 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3345};
3346
3347
3348/*
3349 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3350 */
3351
3352static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3353 const unsigned char* in, int *inlen) {
3354 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3355}
3356static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3357 const unsigned char* in, int *inlen) {
3358 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3359}
3360
3361static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3362 const unsigned char* in, int *inlen) {
3363 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3364}
3365static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3366 const unsigned char* in, int *inlen) {
3367 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3368}
3369
3370static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3371 const unsigned char* in, int *inlen) {
3372 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3373}
3374static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3375 const unsigned char* in, int *inlen) {
3376 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3377}
3378
3379static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3380 const unsigned char* in, int *inlen) {
3381 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3382}
3383static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3384 const unsigned char* in, int *inlen) {
3385 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3386}
3387
3388static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3389 const unsigned char* in, int *inlen) {
3390 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3391}
3392static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3393 const unsigned char* in, int *inlen) {
3394 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3395}
3396
3397static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3398 const unsigned char* in, int *inlen) {
3399 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3400}
3401static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3402 const unsigned char* in, int *inlen) {
3403 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3404}
3405
3406static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3407 const unsigned char* in, int *inlen) {
3408 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3409}
3410static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3411 const unsigned char* in, int *inlen) {
3412 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3413}
3414
3415static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3416 const unsigned char* in, int *inlen) {
3417 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3418}
3419static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3420 const unsigned char* in, int *inlen) {
3421 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3422}
3423
3424static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3425 const unsigned char* in, int *inlen) {
3426 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3427}
3428static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3429 const unsigned char* in, int *inlen) {
3430 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3431}
3432
3433static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3434 const unsigned char* in, int *inlen) {
3435 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3436}
3437static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3438 const unsigned char* in, int *inlen) {
3439 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3440}
3441
3442static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3443 const unsigned char* in, int *inlen) {
3444 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3445}
3446static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3447 const unsigned char* in, int *inlen) {
3448 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3449}
3450
3451static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3452 const unsigned char* in, int *inlen) {
3453 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3454}
3455static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3456 const unsigned char* in, int *inlen) {
3457 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3458}
3459
3460static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3461 const unsigned char* in, int *inlen) {
3462 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3463}
3464static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3465 const unsigned char* in, int *inlen) {
3466 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3467}
3468
3469static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3470 const unsigned char* in, int *inlen) {
3471 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3472}
3473static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3474 const unsigned char* in, int *inlen) {
3475 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3476}
3477
3478static void
3479xmlRegisterCharEncodingHandlersISO8859x (void) {
3480 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3481 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3482 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3483 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3484 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3485 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3486 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3487 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3488 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3489 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3490 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3491 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3492 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3493 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3494}
3495
3496#endif
3497#endif
3498
3499