blob: fd3623d122d67bdd32cc4f65d95c84eff78ab870 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
20 * UTF8 string routines from:
21 * "William M. Brack" <wbrack@mmm.com.hk>
22 *
23 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000024 */
25
Daniel Veillard34ce8be2002-03-18 19:37:11 +000026#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000027#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000028
Owen Taylor3473f882001-02-23 17:55:21 +000029#include <string.h>
30
31#ifdef HAVE_CTYPE_H
32#include <ctype.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
Owen Taylor3473f882001-02-23 17:55:21 +000037#ifdef LIBXML_ICONV_ENABLED
38#ifdef HAVE_ERRNO_H
39#include <errno.h>
40#endif
41#endif
42#include <libxml/encoding.h>
43#include <libxml/xmlmemory.h>
44#ifdef LIBXML_HTML_ENABLED
45#include <libxml/HTMLparser.h>
46#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000047#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000048#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049
Daniel Veillard22090732001-07-16 00:06:07 +000050static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
51static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000052
53typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
54typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
55struct _xmlCharEncodingAlias {
56 const char *name;
57 const char *alias;
58};
59
60static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
61static int xmlCharEncodingAliasesNb = 0;
62static int xmlCharEncodingAliasesMax = 0;
63
64#ifdef LIBXML_ICONV_ENABLED
65#if 0
66#define DEBUG_ENCODING /* Define this to get encoding traces */
67#endif
William M. Brack16db7b62003-08-07 13:12:49 +000068#else
69#ifdef LIBXML_ISO8859X_ENABLED
70static void xmlRegisterCharEncodingHandlersISO8859x (void);
71#endif
Owen Taylor3473f882001-02-23 17:55:21 +000072#endif
73
74static int xmlLittleEndian = 1;
75
Daniel Veillard97ac1312001-05-30 19:14:17 +000076/************************************************************************
77 * *
78 * Generic UTF8 handling routines *
79 * *
80 * From rfc2044: encoding of the Unicode values on UTF-8: *
81 * *
82 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
83 * 0000 0000-0000 007F 0xxxxxxx *
84 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
85 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
86 * *
87 * I hope we won't use values > 0xFFFF anytime soon ! *
88 * *
89 ************************************************************************/
Owen Taylor3473f882001-02-23 17:55:21 +000090
91/**
William M. Brack4a557d92003-07-29 04:28:04 +000092 * xmlUTF8Size:
93 * @utf: pointer to the UTF8 character
94 *
William M. Brackf9415e42003-11-28 09:39:10 +000095 * calculates the internal size of a UTF8 character
William M. Brack7a821652003-08-15 07:27:40 +000096 *
William M. Brack4a557d92003-07-29 04:28:04 +000097 * returns the numbers of bytes in the character, -1 on format error
98 */
99int
100xmlUTF8Size(const xmlChar *utf) {
101 xmlChar mask;
102 int len;
103
104 if (utf == NULL)
105 return -1;
106 if (*utf < 0x80)
107 return 1;
108 /* check valid UTF8 character */
109 if (!(*utf & 0x40))
110 return -1;
111 /* determine number of bytes in char */
112 len = 2;
113 for (mask=0x20; mask != 0; mask>>=1) {
114 if (!(*utf & mask))
115 return len;
116 len++;
117 }
118 return -1;
119}
120
121/**
William M. Brack7a821652003-08-15 07:27:40 +0000122 * xmlUTF8Charcmp:
William M. Brack4a557d92003-07-29 04:28:04 +0000123 * @utf1: pointer to first UTF8 char
124 * @utf2: pointer to second UTF8 char
125 *
William M. Brack7a821652003-08-15 07:27:40 +0000126 * compares the two UCS4 values
127 *
128 * returns result of the compare as with xmlStrncmp
William M. Brack4a557d92003-07-29 04:28:04 +0000129 */
130int
131xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
132
133 if (utf1 == NULL ) {
134 if (utf2 == NULL)
135 return 0;
136 return -1;
137 }
Daniel Veillard9ff7de12003-07-29 13:30:42 +0000138 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
William M. Brack4a557d92003-07-29 04:28:04 +0000139}
140
141/**
Daniel Veillarde043ee12001-04-16 14:08:07 +0000142 * xmlUTF8Strlen:
143 * @utf: a sequence of UTF-8 encoded bytes
144 *
Daniel Veillard60087f32001-10-10 09:45:09 +0000145 * compute the length of an UTF8 string, it doesn't do a full UTF8
Daniel Veillarde043ee12001-04-16 14:08:07 +0000146 * checking of the content of the string.
147 *
148 * Returns the number of characters in the string or -1 in case of error
149 */
150int
Daniel Veillard97ac1312001-05-30 19:14:17 +0000151xmlUTF8Strlen(const xmlChar *utf) {
Daniel Veillarde043ee12001-04-16 14:08:07 +0000152 int ret = 0;
153
154 if (utf == NULL)
155 return(-1);
156
157 while (*utf != 0) {
158 if (utf[0] & 0x80) {
159 if ((utf[1] & 0xc0) != 0x80)
160 return(-1);
161 if ((utf[0] & 0xe0) == 0xe0) {
162 if ((utf[2] & 0xc0) != 0x80)
163 return(-1);
164 if ((utf[0] & 0xf0) == 0xf0) {
165 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
166 return(-1);
167 utf += 4;
168 } else {
169 utf += 3;
170 }
171 } else {
172 utf += 2;
173 }
174 } else {
175 utf++;
176 }
177 ret++;
178 }
179 return(ret);
180}
181
182/**
Owen Taylor3473f882001-02-23 17:55:21 +0000183 * xmlGetUTF8Char:
184 * @utf: a sequence of UTF-8 encoded bytes
185 * @len: a pointer to @bytes len
186 *
187 * Read one UTF8 Char from @utf
188 *
William M. Brackf9415e42003-11-28 09:39:10 +0000189 * Returns the char value or -1 in case of error, and updates *len with the
190 * number of bytes consumed
Owen Taylor3473f882001-02-23 17:55:21 +0000191 */
Daniel Veillardf000f072002-10-22 14:28:17 +0000192int
Owen Taylor3473f882001-02-23 17:55:21 +0000193xmlGetUTF8Char(const unsigned char *utf, int *len) {
194 unsigned int c;
195
196 if (utf == NULL)
197 goto error;
198 if (len == NULL)
199 goto error;
200 if (*len < 1)
201 goto error;
202
203 c = utf[0];
204 if (c & 0x80) {
205 if (*len < 2)
206 goto error;
207 if ((utf[1] & 0xc0) != 0x80)
208 goto error;
209 if ((c & 0xe0) == 0xe0) {
210 if (*len < 3)
211 goto error;
212 if ((utf[2] & 0xc0) != 0x80)
213 goto error;
214 if ((c & 0xf0) == 0xf0) {
215 if (*len < 4)
216 goto error;
217 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
218 goto error;
219 *len = 4;
220 /* 4-byte code */
221 c = (utf[0] & 0x7) << 18;
222 c |= (utf[1] & 0x3f) << 12;
223 c |= (utf[2] & 0x3f) << 6;
224 c |= utf[3] & 0x3f;
225 } else {
226 /* 3-byte code */
227 *len = 3;
228 c = (utf[0] & 0xf) << 12;
229 c |= (utf[1] & 0x3f) << 6;
230 c |= utf[2] & 0x3f;
231 }
232 } else {
233 /* 2-byte code */
234 *len = 2;
235 c = (utf[0] & 0x1f) << 6;
236 c |= utf[1] & 0x3f;
237 }
238 } else {
239 /* 1-byte code */
240 *len = 1;
241 }
242 return(c);
243
244error:
245 *len = 0;
246 return(-1);
247}
248
249/**
Daniel Veillard01c13b52002-12-10 15:19:08 +0000250 * xmlCheckUTF8:
William M. Brackf9415e42003-11-28 09:39:10 +0000251 * @utf: Pointer to putative UTF-8 encoded string.
Owen Taylor3473f882001-02-23 17:55:21 +0000252 *
William M. Brackf9415e42003-11-28 09:39:10 +0000253 * Checks @utf for being valid UTF-8. @utf is assumed to be
Owen Taylor3473f882001-02-23 17:55:21 +0000254 * null-terminated. This function is not super-strict, as it will
William M. Brackf9415e42003-11-28 09:39:10 +0000255 * allow longer UTF-8 sequences than necessary. Note that Java is
Owen Taylor3473f882001-02-23 17:55:21 +0000256 * capable of producing these sequences if provoked. Also note, this
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000257 * routine checks for the 4-byte maximum size, but does not check for
Owen Taylor3473f882001-02-23 17:55:21 +0000258 * 0x10ffff maximum value.
259 *
260 * Return value: true if @utf is valid.
261 **/
262int
263xmlCheckUTF8(const unsigned char *utf)
264{
265 int ix;
266 unsigned char c;
267
268 for (ix = 0; (c = utf[ix]);) {
269 if (c & 0x80) {
270 if ((utf[ix + 1] & 0xc0) != 0x80)
271 return(0);
272 if ((c & 0xe0) == 0xe0) {
273 if ((utf[ix + 2] & 0xc0) != 0x80)
274 return(0);
275 if ((c & 0xf0) == 0xf0) {
276 if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
277 return(0);
278 ix += 4;
279 /* 4-byte code */
280 } else
281 /* 3-byte code */
282 ix += 3;
283 } else
284 /* 2-byte code */
285 ix += 2;
286 } else
287 /* 1-byte code */
288 ix++;
289 }
290 return(1);
291}
292
293/**
Daniel Veillard97ac1312001-05-30 19:14:17 +0000294 * xmlUTF8Strsize:
295 * @utf: a sequence of UTF-8 encoded bytes
296 * @len: the number of characters in the array
297 *
298 * storage size of an UTF8 string
299 *
300 * Returns the storage size of
301 * the first 'len' characters of ARRAY
302 *
303 */
304
305int
306xmlUTF8Strsize(const xmlChar *utf, int len) {
307 const xmlChar *ptr=utf;
308 xmlChar ch;
309
310 if (len <= 0)
311 return(0);
312
313 while ( len-- > 0) {
314 if ( !*ptr )
315 break;
316 if ( (ch = *ptr++) & 0x80)
317 while ( (ch<<=1) & 0x80 )
318 ptr++;
319 }
320 return (ptr - utf);
321}
322
323
324/**
325 * xmlUTF8Strndup:
326 * @utf: the input UTF8 *
327 * @len: the len of @utf (in chars)
328 *
329 * a strndup for array of UTF8's
330 *
331 * Returns a new UTF8 * or NULL
332 */
333xmlChar *
334xmlUTF8Strndup(const xmlChar *utf, int len) {
335 xmlChar *ret;
336 int i;
337
338 if ((utf == NULL) || (len < 0)) return(NULL);
339 i = xmlUTF8Strsize(utf, len);
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000340 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
Daniel Veillard97ac1312001-05-30 19:14:17 +0000341 if (ret == NULL) {
342 xmlGenericError(xmlGenericErrorContext,
343 "malloc of %ld byte failed\n",
344 (len + 1) * (long)sizeof(xmlChar));
345 return(NULL);
346 }
347 memcpy(ret, utf, i * sizeof(xmlChar));
348 ret[i] = 0;
349 return(ret);
350}
351
352/**
353 * xmlUTF8Strpos:
354 * @utf: the input UTF8 *
355 * @pos: the position of the desired UTF8 char (in chars)
356 *
357 * a function to provide the equivalent of fetching a
358 * character from a string array
359 *
360 * Returns a pointer to the UTF8 character or NULL
361 */
362xmlChar *
363xmlUTF8Strpos(const xmlChar *utf, int pos) {
364 xmlChar ch;
365
366 if (utf == NULL) return(NULL);
367 if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
368 return(NULL);
369 while (pos--) {
370 if ((ch=*utf++) == 0) return(NULL);
371 if ( ch & 0x80 ) {
372 /* if not simple ascii, verify proper format */
373 if ( (ch & 0xc0) != 0xc0 )
374 return(NULL);
375 /* then skip over remaining bytes for this char */
376 while ( (ch <<= 1) & 0x80 )
377 if ( (*utf++ & 0xc0) != 0x80 )
378 return(NULL);
379 }
380 }
381 return((xmlChar *)utf);
382}
383
384/**
385 * xmlUTF8Strloc:
386 * @utf: the input UTF8 *
387 * @utfchar: the UTF8 character to be found
388 *
William M. Brackf9415e42003-11-28 09:39:10 +0000389 * a function to provide the relative location of a UTF8 char
Daniel Veillard97ac1312001-05-30 19:14:17 +0000390 *
391 * Returns the relative character position of the desired char
392 * or -1 if not found
393 */
394int
395xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
396 int i, size;
397 xmlChar ch;
398
399 if (utf==NULL || utfchar==NULL) return -1;
400 size = xmlUTF8Strsize(utfchar, 1);
401 for(i=0; (ch=*utf) != 0; i++) {
402 if (xmlStrncmp(utf, utfchar, size)==0)
403 return(i);
404 utf++;
405 if ( ch & 0x80 ) {
406 /* if not simple ascii, verify proper format */
407 if ( (ch & 0xc0) != 0xc0 )
408 return(-1);
409 /* then skip over remaining bytes for this char */
410 while ( (ch <<= 1) & 0x80 )
411 if ( (*utf++ & 0xc0) != 0x80 )
412 return(-1);
413 }
414 }
415
416 return(-1);
417}
418/**
419 * xmlUTF8Strsub:
420 * @utf: a sequence of UTF-8 encoded bytes
Daniel Veillard97ac1312001-05-30 19:14:17 +0000421 * @start: relative pos of first char
422 * @len: total number to copy
423 *
William M. Brackf9415e42003-11-28 09:39:10 +0000424 * Create a substring from a given UTF-8 string
Daniel Veillard97ac1312001-05-30 19:14:17 +0000425 * Note: positions are given in units of UTF-8 chars
426 *
427 * Returns a pointer to a newly created string
428 * or NULL if any problem
429 */
430
431xmlChar *
432xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
433 int i;
434 xmlChar ch;
435
436 if (utf == NULL) return(NULL);
437 if (start < 0) return(NULL);
438 if (len < 0) return(NULL);
439
440 /*
441 * Skip over any leading chars
442 */
443 for (i = 0;i < start;i++) {
444 if ((ch=*utf++) == 0) return(NULL);
445 if ( ch & 0x80 ) {
446 /* if not simple ascii, verify proper format */
447 if ( (ch & 0xc0) != 0xc0 )
448 return(NULL);
449 /* then skip over remaining bytes for this char */
450 while ( (ch <<= 1) & 0x80 )
451 if ( (*utf++ & 0xc0) != 0x80 )
452 return(NULL);
453 }
454 }
455
456 return(xmlUTF8Strndup(utf, len));
457}
458
459/************************************************************************
460 * *
461 * Conversions To/From UTF8 encoding *
462 * *
463 ************************************************************************/
464
465/**
Owen Taylor3473f882001-02-23 17:55:21 +0000466 * asciiToUTF8:
467 * @out: a pointer to an array of bytes to store the result
468 * @outlen: the length of @out
469 * @in: a pointer to an array of ASCII chars
470 * @inlen: the length of @in
471 *
472 * Take a block of ASCII chars in and try to convert it to an UTF-8
473 * block of chars out.
474 * Returns 0 if success, or -1 otherwise
475 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000476 * if the return value is positive, else unpredictable.
477 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000478 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000479static int
Owen Taylor3473f882001-02-23 17:55:21 +0000480asciiToUTF8(unsigned char* out, int *outlen,
481 const unsigned char* in, int *inlen) {
482 unsigned char* outstart = out;
483 const unsigned char* base = in;
484 const unsigned char* processed = in;
485 unsigned char* outend = out + *outlen;
486 const unsigned char* inend;
487 unsigned int c;
488 int bits;
489
490 inend = in + (*inlen);
491 while ((in < inend) && (out - outstart + 5 < *outlen)) {
492 c= *in++;
493
494 /* assertion: c is a single UTF-4 value */
495 if (out >= outend)
496 break;
497 if (c < 0x80) { *out++= c; bits= -6; }
498 else {
499 *outlen = out - outstart;
500 *inlen = processed - base;
501 return(-1);
502 }
503
504 for ( ; bits >= 0; bits-= 6) {
505 if (out >= outend)
506 break;
507 *out++= ((c >> bits) & 0x3F) | 0x80;
508 }
509 processed = (const unsigned char*) in;
510 }
511 *outlen = out - outstart;
512 *inlen = processed - base;
513 return(0);
514}
515
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000516#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000517/**
518 * UTF8Toascii:
519 * @out: a pointer to an array of bytes to store the result
520 * @outlen: the length of @out
521 * @in: a pointer to an array of UTF-8 chars
522 * @inlen: the length of @in
523 *
524 * Take a block of UTF-8 chars in and try to convert it to an ASCII
525 * block of chars out.
526 *
527 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
528 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000529 * if the return value is positive, else unpredictable.
530 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000531 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000532static int
Owen Taylor3473f882001-02-23 17:55:21 +0000533UTF8Toascii(unsigned char* out, int *outlen,
534 const unsigned char* in, int *inlen) {
535 const unsigned char* processed = in;
536 const unsigned char* outend;
537 const unsigned char* outstart = out;
538 const unsigned char* instart = in;
539 const unsigned char* inend;
540 unsigned int c, d;
541 int trailing;
542
543 if (in == NULL) {
544 /*
545 * initialization nothing to do
546 */
547 *outlen = 0;
548 *inlen = 0;
549 return(0);
550 }
551 inend = in + (*inlen);
552 outend = out + (*outlen);
553 while (in < inend) {
554 d = *in++;
555 if (d < 0x80) { c= d; trailing= 0; }
556 else if (d < 0xC0) {
557 /* trailing byte in leading position */
558 *outlen = out - outstart;
559 *inlen = processed - instart;
560 return(-2);
561 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
562 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
563 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
564 else {
565 /* no chance for this in Ascii */
566 *outlen = out - outstart;
567 *inlen = processed - instart;
568 return(-2);
569 }
570
571 if (inend - in < trailing) {
572 break;
573 }
574
575 for ( ; trailing; trailing--) {
576 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
577 break;
578 c <<= 6;
579 c |= d & 0x3F;
580 }
581
582 /* assertion: c is a single UTF-4 value */
583 if (c < 0x80) {
584 if (out >= outend)
585 break;
586 *out++ = c;
587 } else {
588 /* no chance for this in Ascii */
589 *outlen = out - outstart;
590 *inlen = processed - instart;
591 return(-2);
592 }
593 processed = in;
594 }
595 *outlen = out - outstart;
596 *inlen = processed - instart;
597 return(0);
598}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000599#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000600
601/**
602 * isolat1ToUTF8:
603 * @out: a pointer to an array of bytes to store the result
604 * @outlen: the length of @out
605 * @in: a pointer to an array of ISO Latin 1 chars
606 * @inlen: the length of @in
607 *
608 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
609 * block of chars out.
610 * Returns 0 if success, or -1 otherwise
611 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000612 * if the return value is positive, else unpredictable.
613 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000614 */
615int
616isolat1ToUTF8(unsigned char* out, int *outlen,
617 const unsigned char* in, int *inlen) {
618 unsigned char* outstart = out;
619 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000620 unsigned char* outend = out + *outlen;
621 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000622 const unsigned char* instop;
623 xmlChar c = *in;
Owen Taylor3473f882001-02-23 17:55:21 +0000624
625 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000626 instop = inend;
627
628 while (in < inend && out < outend - 1) {
629 if (c >= 0x80) {
Daniel Veillarddb552912002-03-21 13:27:59 +0000630 *out++= ((c >> 6) & 0x1F) | 0xC0;
Daniel Veillard02141ea2001-04-30 11:46:40 +0000631 *out++= (c & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000632 ++in;
633 c = *in;
634 }
635 if (instop - in > outend - out) instop = in + (outend - out);
636 while (c < 0x80 && in < instop) {
637 *out++ = c;
638 ++in;
639 c = *in;
640 }
641 }
642 if (in < inend && out < outend && c < 0x80) {
643 *out++ = c;
644 ++in;
Owen Taylor3473f882001-02-23 17:55:21 +0000645 }
646 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000647 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000648 return(0);
649}
650
Daniel Veillard81601f92003-01-14 13:42:37 +0000651/**
652 * UTF8ToUTF8:
653 * @out: a pointer to an array of bytes to store the result
654 * @outlen: the length of @out
655 * @inb: a pointer to an array of UTF-8 chars
656 * @inlenb: the length of @in in UTF-8 chars
657 *
658 * No op copy operation for UTF8 handling.
659 *
William M. Brackf9415e42003-11-28 09:39:10 +0000660 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000661 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000662 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000663 */
664static int
665UTF8ToUTF8(unsigned char* out, int *outlen,
666 const unsigned char* inb, int *inlenb)
667{
668 int len;
669
670 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
671 return(-1);
672 if (*outlen > *inlenb) {
673 len = *inlenb;
674 } else {
675 len = *outlen;
676 }
677 if (len < 0)
678 return(-1);
679
680 memcpy(out, inb, len);
681
682 *outlen = len;
683 *inlenb = len;
684 return(0);
685}
686
Daniel Veillarde72c7562002-05-31 09:47:30 +0000687
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000688#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000689/**
690 * UTF8Toisolat1:
691 * @out: a pointer to an array of bytes to store the result
692 * @outlen: the length of @out
693 * @in: a pointer to an array of UTF-8 chars
694 * @inlen: the length of @in
695 *
696 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
697 * block of chars out.
698 *
699 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
700 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000701 * if the return value is positive, else unpredictable.
702 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000703 */
704int
705UTF8Toisolat1(unsigned char* out, int *outlen,
706 const unsigned char* in, int *inlen) {
707 const unsigned char* processed = in;
708 const unsigned char* outend;
709 const unsigned char* outstart = out;
710 const unsigned char* instart = in;
711 const unsigned char* inend;
712 unsigned int c, d;
713 int trailing;
714
715 if (in == NULL) {
716 /*
717 * initialization nothing to do
718 */
719 *outlen = 0;
720 *inlen = 0;
721 return(0);
722 }
723 inend = in + (*inlen);
724 outend = out + (*outlen);
725 while (in < inend) {
726 d = *in++;
727 if (d < 0x80) { c= d; trailing= 0; }
728 else if (d < 0xC0) {
729 /* trailing byte in leading position */
730 *outlen = out - outstart;
731 *inlen = processed - instart;
732 return(-2);
733 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
734 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
735 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
736 else {
737 /* no chance for this in IsoLat1 */
738 *outlen = out - outstart;
739 *inlen = processed - instart;
740 return(-2);
741 }
742
743 if (inend - in < trailing) {
744 break;
745 }
746
747 for ( ; trailing; trailing--) {
748 if (in >= inend)
749 break;
750 if (((d= *in++) & 0xC0) != 0x80) {
751 *outlen = out - outstart;
752 *inlen = processed - instart;
753 return(-2);
754 }
755 c <<= 6;
756 c |= d & 0x3F;
757 }
758
759 /* assertion: c is a single UTF-4 value */
760 if (c <= 0xFF) {
761 if (out >= outend)
762 break;
763 *out++ = c;
764 } else {
765 /* no chance for this in IsoLat1 */
766 *outlen = out - outstart;
767 *inlen = processed - instart;
768 return(-2);
769 }
770 processed = in;
771 }
772 *outlen = out - outstart;
773 *inlen = processed - instart;
774 return(0);
775}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000776#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000777
778/**
779 * UTF16LEToUTF8:
780 * @out: a pointer to an array of bytes to store the result
781 * @outlen: the length of @out
782 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
783 * @inlenb: the length of @in in UTF-16LE chars
784 *
785 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000786 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000787 * is the same between the native type of this machine and the
788 * inputed one.
789 *
William M. Brackf9415e42003-11-28 09:39:10 +0000790 * Returns the number of bytes written, or -1 if lack of space, or -2
791 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000792 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000793 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000794 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000795static int
Owen Taylor3473f882001-02-23 17:55:21 +0000796UTF16LEToUTF8(unsigned char* out, int *outlen,
797 const unsigned char* inb, int *inlenb)
798{
799 unsigned char* outstart = out;
800 const unsigned char* processed = inb;
801 unsigned char* outend = out + *outlen;
802 unsigned short* in = (unsigned short*) inb;
803 unsigned short* inend;
804 unsigned int c, d, inlen;
805 unsigned char *tmp;
806 int bits;
807
808 if ((*inlenb % 2) == 1)
809 (*inlenb)--;
810 inlen = *inlenb / 2;
811 inend = in + inlen;
812 while ((in < inend) && (out - outstart + 5 < *outlen)) {
813 if (xmlLittleEndian) {
814 c= *in++;
815 } else {
816 tmp = (unsigned char *) in;
817 c = *tmp++;
818 c = c | (((unsigned int)*tmp) << 8);
819 in++;
820 }
821 if ((c & 0xFC00) == 0xD800) { /* surrogates */
822 if (in >= inend) { /* (in > inend) shouldn't happens */
823 break;
824 }
825 if (xmlLittleEndian) {
826 d = *in++;
827 } else {
828 tmp = (unsigned char *) in;
829 d = *tmp++;
830 d = d | (((unsigned int)*tmp) << 8);
831 in++;
832 }
833 if ((d & 0xFC00) == 0xDC00) {
834 c &= 0x03FF;
835 c <<= 10;
836 c |= d & 0x03FF;
837 c += 0x10000;
838 }
839 else {
840 *outlen = out - outstart;
841 *inlenb = processed - inb;
842 return(-2);
843 }
844 }
845
846 /* assertion: c is a single UTF-4 value */
847 if (out >= outend)
848 break;
849 if (c < 0x80) { *out++= c; bits= -6; }
850 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
851 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
852 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
853
854 for ( ; bits >= 0; bits-= 6) {
855 if (out >= outend)
856 break;
857 *out++= ((c >> bits) & 0x3F) | 0x80;
858 }
859 processed = (const unsigned char*) in;
860 }
861 *outlen = out - outstart;
862 *inlenb = processed - inb;
863 return(0);
864}
865
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000866#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000867/**
868 * UTF8ToUTF16LE:
869 * @outb: a pointer to an array of bytes to store the result
870 * @outlen: the length of @outb
871 * @in: a pointer to an array of UTF-8 chars
872 * @inlen: the length of @in
873 *
874 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
875 * block of chars out.
876 *
William M. Brackf9415e42003-11-28 09:39:10 +0000877 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000878 * if the transcoding failed.
879 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000880static int
Owen Taylor3473f882001-02-23 17:55:21 +0000881UTF8ToUTF16LE(unsigned char* outb, int *outlen,
882 const unsigned char* in, int *inlen)
883{
884 unsigned short* out = (unsigned short*) outb;
885 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000886 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000887 unsigned short* outstart= out;
888 unsigned short* outend;
889 const unsigned char* inend= in+*inlen;
890 unsigned int c, d;
891 int trailing;
892 unsigned char *tmp;
893 unsigned short tmp1, tmp2;
894
William M. Brackf9415e42003-11-28 09:39:10 +0000895 /* UTF16LE encoding has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000896 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *outlen = 0;
898 *inlen = 0;
899 return(0);
900 }
901 outend = out + (*outlen / 2);
902 while (in < inend) {
903 d= *in++;
904 if (d < 0x80) { c= d; trailing= 0; }
905 else if (d < 0xC0) {
906 /* trailing byte in leading position */
907 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000908 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000909 return(-2);
910 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
911 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
912 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
913 else {
914 /* no chance for this in UTF-16 */
915 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000916 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 return(-2);
918 }
919
920 if (inend - in < trailing) {
921 break;
922 }
923
924 for ( ; trailing; trailing--) {
925 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
926 break;
927 c <<= 6;
928 c |= d & 0x3F;
929 }
930
931 /* assertion: c is a single UTF-4 value */
932 if (c < 0x10000) {
933 if (out >= outend)
934 break;
935 if (xmlLittleEndian) {
936 *out++ = c;
937 } else {
938 tmp = (unsigned char *) out;
939 *tmp = c ;
940 *(tmp + 1) = c >> 8 ;
941 out++;
942 }
943 }
944 else if (c < 0x110000) {
945 if (out+1 >= outend)
946 break;
947 c -= 0x10000;
948 if (xmlLittleEndian) {
949 *out++ = 0xD800 | (c >> 10);
950 *out++ = 0xDC00 | (c & 0x03FF);
951 } else {
952 tmp1 = 0xD800 | (c >> 10);
953 tmp = (unsigned char *) out;
954 *tmp = (unsigned char) tmp1;
955 *(tmp + 1) = tmp1 >> 8;
956 out++;
957
958 tmp2 = 0xDC00 | (c & 0x03FF);
959 tmp = (unsigned char *) out;
960 *tmp = (unsigned char) tmp2;
961 *(tmp + 1) = tmp2 >> 8;
962 out++;
963 }
964 }
965 else
966 break;
967 processed = in;
968 }
969 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000970 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000971 return(0);
972}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000973#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000974
975/**
William M. Brackf9415e42003-11-28 09:39:10 +0000976 * UTF8ToUTF16:
977 * @outb: a pointer to an array of bytes to store the result
978 * @outlen: the length of @outb
979 * @in: a pointer to an array of UTF-8 chars
980 * @inlen: the length of @in
981 *
982 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
983 * block of chars out.
984 *
985 * Returns the number of bytes written, or -1 if lack of space, or -2
986 * if the transcoding failed.
987 */
988static int
989UTF8ToUTF16(unsigned char* outb, int *outlen,
990 const unsigned char* in, int *inlen)
991{
992 if (in == NULL) {
993 /*
994 * initialization, add the Byte Order Mark for UTF-16LE
995 */
996 if (*outlen >= 2) {
997 outb[0] = 0xFF;
998 outb[1] = 0xFE;
999 *outlen = 2;
1000 *inlen = 0;
1001#ifdef DEBUG_ENCODING
1002 xmlGenericError(xmlGenericErrorContext,
1003 "Added FFFE Byte Order Mark\n");
1004#endif
1005 return(2);
1006 }
1007 *outlen = 0;
1008 *inlen = 0;
1009 return(0);
1010 }
1011 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
1012}
1013
1014/**
Owen Taylor3473f882001-02-23 17:55:21 +00001015 * UTF16BEToUTF8:
1016 * @out: a pointer to an array of bytes to store the result
1017 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +00001018 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +00001019 * @inlenb: the length of @in in UTF-16 chars
1020 *
1021 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +00001022 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +00001023 * is the same between the native type of this machine and the
1024 * inputed one.
1025 *
William M. Brackf9415e42003-11-28 09:39:10 +00001026 * Returns the number of bytes written, or -1 if lack of space, or -2
1027 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +00001028 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +00001029 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001030 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001031static int
Owen Taylor3473f882001-02-23 17:55:21 +00001032UTF16BEToUTF8(unsigned char* out, int *outlen,
1033 const unsigned char* inb, int *inlenb)
1034{
1035 unsigned char* outstart = out;
1036 const unsigned char* processed = inb;
1037 unsigned char* outend = out + *outlen;
1038 unsigned short* in = (unsigned short*) inb;
1039 unsigned short* inend;
1040 unsigned int c, d, inlen;
1041 unsigned char *tmp;
1042 int bits;
1043
1044 if ((*inlenb % 2) == 1)
1045 (*inlenb)--;
1046 inlen = *inlenb / 2;
1047 inend= in + inlen;
1048 while (in < inend) {
1049 if (xmlLittleEndian) {
1050 tmp = (unsigned char *) in;
1051 c = *tmp++;
1052 c = c << 8;
1053 c = c | (unsigned int) *tmp;
1054 in++;
1055 } else {
1056 c= *in++;
1057 }
1058 if ((c & 0xFC00) == 0xD800) { /* surrogates */
1059 if (in >= inend) { /* (in > inend) shouldn't happens */
1060 *outlen = out - outstart;
1061 *inlenb = processed - inb;
1062 return(-2);
1063 }
1064 if (xmlLittleEndian) {
1065 tmp = (unsigned char *) in;
1066 d = *tmp++;
1067 d = d << 8;
1068 d = d | (unsigned int) *tmp;
1069 in++;
1070 } else {
1071 d= *in++;
1072 }
1073 if ((d & 0xFC00) == 0xDC00) {
1074 c &= 0x03FF;
1075 c <<= 10;
1076 c |= d & 0x03FF;
1077 c += 0x10000;
1078 }
1079 else {
1080 *outlen = out - outstart;
1081 *inlenb = processed - inb;
1082 return(-2);
1083 }
1084 }
1085
1086 /* assertion: c is a single UTF-4 value */
1087 if (out >= outend)
1088 break;
1089 if (c < 0x80) { *out++= c; bits= -6; }
1090 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
1091 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
1092 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1093
1094 for ( ; bits >= 0; bits-= 6) {
1095 if (out >= outend)
1096 break;
1097 *out++= ((c >> bits) & 0x3F) | 0x80;
1098 }
1099 processed = (const unsigned char*) in;
1100 }
1101 *outlen = out - outstart;
1102 *inlenb = processed - inb;
1103 return(0);
1104}
1105
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001106#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001107/**
1108 * UTF8ToUTF16BE:
1109 * @outb: a pointer to an array of bytes to store the result
1110 * @outlen: the length of @outb
1111 * @in: a pointer to an array of UTF-8 chars
1112 * @inlen: the length of @in
1113 *
1114 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1115 * block of chars out.
1116 *
1117 * Returns the number of byte written, or -1 by lack of space, or -2
1118 * if the transcoding failed.
1119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001120static int
Owen Taylor3473f882001-02-23 17:55:21 +00001121UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1122 const unsigned char* in, int *inlen)
1123{
1124 unsigned short* out = (unsigned short*) outb;
1125 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001126 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +00001127 unsigned short* outstart= out;
1128 unsigned short* outend;
1129 const unsigned char* inend= in+*inlen;
1130 unsigned int c, d;
1131 int trailing;
1132 unsigned char *tmp;
1133 unsigned short tmp1, tmp2;
1134
William M. Brackf9415e42003-11-28 09:39:10 +00001135 /* UTF-16BE has no BOM */
Owen Taylor3473f882001-02-23 17:55:21 +00001136 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001137 *outlen = 0;
1138 *inlen = 0;
1139 return(0);
1140 }
1141 outend = out + (*outlen / 2);
1142 while (in < inend) {
1143 d= *in++;
1144 if (d < 0x80) { c= d; trailing= 0; }
1145 else if (d < 0xC0) {
1146 /* trailing byte in leading position */
1147 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001148 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001149 return(-2);
1150 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1151 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1152 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1153 else {
1154 /* no chance for this in UTF-16 */
1155 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001156 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001157 return(-2);
1158 }
1159
1160 if (inend - in < trailing) {
1161 break;
1162 }
1163
1164 for ( ; trailing; trailing--) {
1165 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
1166 c <<= 6;
1167 c |= d & 0x3F;
1168 }
1169
1170 /* assertion: c is a single UTF-4 value */
1171 if (c < 0x10000) {
1172 if (out >= outend) break;
1173 if (xmlLittleEndian) {
1174 tmp = (unsigned char *) out;
1175 *tmp = c >> 8;
1176 *(tmp + 1) = c;
1177 out++;
1178 } else {
1179 *out++ = c;
1180 }
1181 }
1182 else if (c < 0x110000) {
1183 if (out+1 >= outend) break;
1184 c -= 0x10000;
1185 if (xmlLittleEndian) {
1186 tmp1 = 0xD800 | (c >> 10);
1187 tmp = (unsigned char *) out;
1188 *tmp = tmp1 >> 8;
1189 *(tmp + 1) = (unsigned char) tmp1;
1190 out++;
1191
1192 tmp2 = 0xDC00 | (c & 0x03FF);
1193 tmp = (unsigned char *) out;
1194 *tmp = tmp2 >> 8;
1195 *(tmp + 1) = (unsigned char) tmp2;
1196 out++;
1197 } else {
1198 *out++ = 0xD800 | (c >> 10);
1199 *out++ = 0xDC00 | (c & 0x03FF);
1200 }
1201 }
1202 else
1203 break;
1204 processed = in;
1205 }
1206 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001207 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001208 return(0);
1209}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001210#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001211
Daniel Veillard97ac1312001-05-30 19:14:17 +00001212/************************************************************************
1213 * *
1214 * Generic encoding handling routines *
1215 * *
1216 ************************************************************************/
1217
Owen Taylor3473f882001-02-23 17:55:21 +00001218/**
1219 * xmlDetectCharEncoding:
1220 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +00001221 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +00001222 * @len: pointer to the length of the buffer
1223 *
1224 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +00001225 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +00001226 *
1227 * Returns one of the XML_CHAR_ENCODING_... values.
1228 */
1229xmlCharEncoding
1230xmlDetectCharEncoding(const unsigned char* in, int len)
1231{
1232 if (len >= 4) {
1233 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1234 (in[2] == 0x00) && (in[3] == 0x3C))
1235 return(XML_CHAR_ENCODING_UCS4BE);
1236 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1237 (in[2] == 0x00) && (in[3] == 0x00))
1238 return(XML_CHAR_ENCODING_UCS4LE);
1239 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1240 (in[2] == 0x3C) && (in[3] == 0x00))
1241 return(XML_CHAR_ENCODING_UCS4_2143);
1242 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1243 (in[2] == 0x00) && (in[3] == 0x00))
1244 return(XML_CHAR_ENCODING_UCS4_3412);
1245 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
1246 (in[2] == 0xA7) && (in[3] == 0x94))
1247 return(XML_CHAR_ENCODING_EBCDIC);
1248 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
1249 (in[2] == 0x78) && (in[3] == 0x6D))
1250 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +00001251 /*
1252 * Although not part of the recommendation, we also
1253 * attempt an "auto-recognition" of UTF-16LE and
1254 * UTF-16BE encodings.
1255 */
1256 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1257 (in[2] == 0x3F) && (in[3] == 0x00))
1258 return(XML_CHAR_ENCODING_UTF16LE);
1259 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1260 (in[2] == 0x00) && (in[3] == 0x3F))
1261 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +00001262 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001263 if (len >= 3) {
1264 /*
1265 * Errata on XML-1.0 June 20 2001
1266 * We now allow an UTF8 encoded BOM
1267 */
1268 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1269 (in[2] == 0xBF))
1270 return(XML_CHAR_ENCODING_UTF8);
1271 }
William M. Brackf9415e42003-11-28 09:39:10 +00001272 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +00001273 if (len >= 2) {
1274 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1275 return(XML_CHAR_ENCODING_UTF16BE);
1276 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1277 return(XML_CHAR_ENCODING_UTF16LE);
1278 }
1279 return(XML_CHAR_ENCODING_NONE);
1280}
1281
1282/**
1283 * xmlCleanupEncodingAliases:
1284 *
1285 * Unregisters all aliases
1286 */
1287void
1288xmlCleanupEncodingAliases(void) {
1289 int i;
1290
1291 if (xmlCharEncodingAliases == NULL)
1292 return;
1293
1294 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1295 if (xmlCharEncodingAliases[i].name != NULL)
1296 xmlFree((char *) xmlCharEncodingAliases[i].name);
1297 if (xmlCharEncodingAliases[i].alias != NULL)
1298 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1299 }
1300 xmlCharEncodingAliasesNb = 0;
1301 xmlCharEncodingAliasesMax = 0;
1302 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +00001303 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001304}
1305
1306/**
1307 * xmlGetEncodingAlias:
1308 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1309 *
1310 * Lookup an encoding name for the given alias.
1311 *
William M. Brackf9415e42003-11-28 09:39:10 +00001312 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +00001313 */
1314const char *
1315xmlGetEncodingAlias(const char *alias) {
1316 int i;
1317 char upper[100];
1318
1319 if (alias == NULL)
1320 return(NULL);
1321
1322 if (xmlCharEncodingAliases == NULL)
1323 return(NULL);
1324
1325 for (i = 0;i < 99;i++) {
1326 upper[i] = toupper(alias[i]);
1327 if (upper[i] == 0) break;
1328 }
1329 upper[i] = 0;
1330
1331 /*
1332 * Walk down the list looking for a definition of the alias
1333 */
1334 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1335 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1336 return(xmlCharEncodingAliases[i].name);
1337 }
1338 }
1339 return(NULL);
1340}
1341
1342/**
1343 * xmlAddEncodingAlias:
1344 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1345 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1346 *
William M. Brackf9415e42003-11-28 09:39:10 +00001347 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +00001348 * will be overwritten.
1349 *
1350 * Returns 0 in case of success, -1 in case of error
1351 */
1352int
1353xmlAddEncodingAlias(const char *name, const char *alias) {
1354 int i;
1355 char upper[100];
1356
1357 if ((name == NULL) || (alias == NULL))
1358 return(-1);
1359
1360 for (i = 0;i < 99;i++) {
1361 upper[i] = toupper(alias[i]);
1362 if (upper[i] == 0) break;
1363 }
1364 upper[i] = 0;
1365
1366 if (xmlCharEncodingAliases == NULL) {
1367 xmlCharEncodingAliasesNb = 0;
1368 xmlCharEncodingAliasesMax = 20;
1369 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1370 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1371 if (xmlCharEncodingAliases == NULL)
1372 return(-1);
1373 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1374 xmlCharEncodingAliasesMax *= 2;
1375 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1376 xmlRealloc(xmlCharEncodingAliases,
1377 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1378 }
1379 /*
1380 * Walk down the list looking for a definition of the alias
1381 */
1382 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1383 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1384 /*
1385 * Replace the definition.
1386 */
1387 xmlFree((char *) xmlCharEncodingAliases[i].name);
1388 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1389 return(0);
1390 }
1391 }
1392 /*
1393 * Add the definition
1394 */
1395 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1396 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1397 xmlCharEncodingAliasesNb++;
1398 return(0);
1399}
1400
1401/**
1402 * xmlDelEncodingAlias:
1403 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1404 *
1405 * Unregisters an encoding alias @alias
1406 *
1407 * Returns 0 in case of success, -1 in case of error
1408 */
1409int
1410xmlDelEncodingAlias(const char *alias) {
1411 int i;
1412
1413 if (alias == NULL)
1414 return(-1);
1415
1416 if (xmlCharEncodingAliases == NULL)
1417 return(-1);
1418 /*
1419 * Walk down the list looking for a definition of the alias
1420 */
1421 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1422 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1423 xmlFree((char *) xmlCharEncodingAliases[i].name);
1424 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1425 xmlCharEncodingAliasesNb--;
1426 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1427 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1428 return(0);
1429 }
1430 }
1431 return(-1);
1432}
1433
1434/**
1435 * xmlParseCharEncoding:
1436 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1437 *
William M. Brackf9415e42003-11-28 09:39:10 +00001438 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001439 * that the comparison is case insensitive accordingly to the section
1440 * [XML] 4.3.3 Character Encoding in Entities.
1441 *
1442 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1443 * if not recognized.
1444 */
1445xmlCharEncoding
1446xmlParseCharEncoding(const char* name)
1447{
1448 const char *alias;
1449 char upper[500];
1450 int i;
1451
1452 if (name == NULL)
1453 return(XML_CHAR_ENCODING_NONE);
1454
1455 /*
1456 * Do the alias resolution
1457 */
1458 alias = xmlGetEncodingAlias(name);
1459 if (alias != NULL)
1460 name = alias;
1461
1462 for (i = 0;i < 499;i++) {
1463 upper[i] = toupper(name[i]);
1464 if (upper[i] == 0) break;
1465 }
1466 upper[i] = 0;
1467
1468 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1469 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1470 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1471
1472 /*
1473 * NOTE: if we were able to parse this, the endianness of UTF16 is
1474 * already found and in use
1475 */
1476 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1477 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1478
1479 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1480 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1481 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1482
1483 /*
1484 * NOTE: if we were able to parse this, the endianness of UCS4 is
1485 * already found and in use
1486 */
1487 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1488 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1489 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1490
1491
1492 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1493 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1494 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1495
1496 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1497 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1498 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1499
1500 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1501 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1502 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1503 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1504 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1505 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1506 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1507
1508 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1509 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1510 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1511
1512#ifdef DEBUG_ENCODING
1513 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1514#endif
1515 return(XML_CHAR_ENCODING_ERROR);
1516}
1517
1518/**
1519 * xmlGetCharEncodingName:
1520 * @enc: the encoding
1521 *
1522 * The "canonical" name for XML encoding.
1523 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1524 * Section 4.3.3 Character Encoding in Entities
1525 *
1526 * Returns the canonical name for the given encoding
1527 */
1528
1529const char*
1530xmlGetCharEncodingName(xmlCharEncoding enc) {
1531 switch (enc) {
1532 case XML_CHAR_ENCODING_ERROR:
1533 return(NULL);
1534 case XML_CHAR_ENCODING_NONE:
1535 return(NULL);
1536 case XML_CHAR_ENCODING_UTF8:
1537 return("UTF-8");
1538 case XML_CHAR_ENCODING_UTF16LE:
1539 return("UTF-16");
1540 case XML_CHAR_ENCODING_UTF16BE:
1541 return("UTF-16");
1542 case XML_CHAR_ENCODING_EBCDIC:
1543 return("EBCDIC");
1544 case XML_CHAR_ENCODING_UCS4LE:
1545 return("ISO-10646-UCS-4");
1546 case XML_CHAR_ENCODING_UCS4BE:
1547 return("ISO-10646-UCS-4");
1548 case XML_CHAR_ENCODING_UCS4_2143:
1549 return("ISO-10646-UCS-4");
1550 case XML_CHAR_ENCODING_UCS4_3412:
1551 return("ISO-10646-UCS-4");
1552 case XML_CHAR_ENCODING_UCS2:
1553 return("ISO-10646-UCS-2");
1554 case XML_CHAR_ENCODING_8859_1:
1555 return("ISO-8859-1");
1556 case XML_CHAR_ENCODING_8859_2:
1557 return("ISO-8859-2");
1558 case XML_CHAR_ENCODING_8859_3:
1559 return("ISO-8859-3");
1560 case XML_CHAR_ENCODING_8859_4:
1561 return("ISO-8859-4");
1562 case XML_CHAR_ENCODING_8859_5:
1563 return("ISO-8859-5");
1564 case XML_CHAR_ENCODING_8859_6:
1565 return("ISO-8859-6");
1566 case XML_CHAR_ENCODING_8859_7:
1567 return("ISO-8859-7");
1568 case XML_CHAR_ENCODING_8859_8:
1569 return("ISO-8859-8");
1570 case XML_CHAR_ENCODING_8859_9:
1571 return("ISO-8859-9");
1572 case XML_CHAR_ENCODING_2022_JP:
1573 return("ISO-2022-JP");
1574 case XML_CHAR_ENCODING_SHIFT_JIS:
1575 return("Shift-JIS");
1576 case XML_CHAR_ENCODING_EUC_JP:
1577 return("EUC-JP");
1578 case XML_CHAR_ENCODING_ASCII:
1579 return(NULL);
1580 }
1581 return(NULL);
1582}
1583
Daniel Veillard97ac1312001-05-30 19:14:17 +00001584/************************************************************************
1585 * *
1586 * Char encoding handlers *
1587 * *
1588 ************************************************************************/
1589
Owen Taylor3473f882001-02-23 17:55:21 +00001590
1591/* the size should be growable, but it's not a big deal ... */
1592#define MAX_ENCODING_HANDLERS 50
1593static xmlCharEncodingHandlerPtr *handlers = NULL;
1594static int nbCharEncodingHandler = 0;
1595
1596/*
1597 * The default is UTF-8 for XML, that's also the default used for the
1598 * parser internals, so the default encoding handler is NULL
1599 */
1600
1601static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1602
1603/**
1604 * xmlNewCharEncodingHandler:
1605 * @name: the encoding name, in UTF-8 format (ASCII actually)
1606 * @input: the xmlCharEncodingInputFunc to read that encoding
1607 * @output: the xmlCharEncodingOutputFunc to write that encoding
1608 *
1609 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001610 *
Owen Taylor3473f882001-02-23 17:55:21 +00001611 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1612 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001613xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001614xmlNewCharEncodingHandler(const char *name,
1615 xmlCharEncodingInputFunc input,
1616 xmlCharEncodingOutputFunc output) {
1617 xmlCharEncodingHandlerPtr handler;
1618 const char *alias;
1619 char upper[500];
1620 int i;
1621 char *up = 0;
1622
1623 /*
1624 * Do the alias resolution
1625 */
1626 alias = xmlGetEncodingAlias(name);
1627 if (alias != NULL)
1628 name = alias;
1629
1630 /*
1631 * Keep only the uppercase version of the encoding.
1632 */
1633 if (name == NULL) {
1634 xmlGenericError(xmlGenericErrorContext,
1635 "xmlNewCharEncodingHandler : no name !\n");
1636 return(NULL);
1637 }
1638 for (i = 0;i < 499;i++) {
1639 upper[i] = toupper(name[i]);
1640 if (upper[i] == 0) break;
1641 }
1642 upper[i] = 0;
1643 up = xmlMemStrdup(upper);
1644 if (up == NULL) {
1645 xmlGenericError(xmlGenericErrorContext,
1646 "xmlNewCharEncodingHandler : out of memory !\n");
1647 return(NULL);
1648 }
1649
1650 /*
1651 * allocate and fill-up an handler block.
1652 */
1653 handler = (xmlCharEncodingHandlerPtr)
1654 xmlMalloc(sizeof(xmlCharEncodingHandler));
1655 if (handler == NULL) {
1656 xmlGenericError(xmlGenericErrorContext,
1657 "xmlNewCharEncodingHandler : out of memory !\n");
1658 return(NULL);
1659 }
1660 handler->input = input;
1661 handler->output = output;
1662 handler->name = up;
1663
1664#ifdef LIBXML_ICONV_ENABLED
1665 handler->iconv_in = NULL;
1666 handler->iconv_out = NULL;
1667#endif /* LIBXML_ICONV_ENABLED */
1668
1669 /*
1670 * registers and returns the handler.
1671 */
1672 xmlRegisterCharEncodingHandler(handler);
1673#ifdef DEBUG_ENCODING
1674 xmlGenericError(xmlGenericErrorContext,
1675 "Registered encoding handler for %s\n", name);
1676#endif
1677 return(handler);
1678}
1679
1680/**
1681 * xmlInitCharEncodingHandlers:
1682 *
1683 * Initialize the char encoding support, it registers the default
1684 * encoding supported.
1685 * NOTE: while public, this function usually doesn't need to be called
1686 * in normal processing.
1687 */
1688void
1689xmlInitCharEncodingHandlers(void) {
1690 unsigned short int tst = 0x1234;
1691 unsigned char *ptr = (unsigned char *) &tst;
1692
1693 if (handlers != NULL) return;
1694
1695 handlers = (xmlCharEncodingHandlerPtr *)
1696 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1697
1698 if (*ptr == 0x12) xmlLittleEndian = 0;
1699 else if (*ptr == 0x34) xmlLittleEndian = 1;
1700 else xmlGenericError(xmlGenericErrorContext,
1701 "Odd problem at endianness detection\n");
1702
1703 if (handlers == NULL) {
1704 xmlGenericError(xmlGenericErrorContext,
1705 "xmlInitCharEncodingHandlers : out of memory !\n");
1706 return;
1707 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001708 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001709#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001710 xmlUTF16LEHandler =
1711 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1712 xmlUTF16BEHandler =
1713 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001714 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001715 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1716 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001717 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001718#ifdef LIBXML_HTML_ENABLED
1719 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1720#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001721#else
1722 xmlUTF16LEHandler =
1723 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1724 xmlUTF16BEHandler =
1725 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001726 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001727 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1728 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1729 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1730#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001731#ifndef LIBXML_ICONV_ENABLED
1732#ifdef LIBXML_ISO8859X_ENABLED
1733 xmlRegisterCharEncodingHandlersISO8859x ();
1734#endif
1735#endif
1736
Owen Taylor3473f882001-02-23 17:55:21 +00001737}
1738
1739/**
1740 * xmlCleanupCharEncodingHandlers:
1741 *
1742 * Cleanup the memory allocated for the char encoding support, it
1743 * unregisters all the encoding handlers and the aliases.
1744 */
1745void
1746xmlCleanupCharEncodingHandlers(void) {
1747 xmlCleanupEncodingAliases();
1748
1749 if (handlers == NULL) return;
1750
1751 for (;nbCharEncodingHandler > 0;) {
1752 nbCharEncodingHandler--;
1753 if (handlers[nbCharEncodingHandler] != NULL) {
1754 if (handlers[nbCharEncodingHandler]->name != NULL)
1755 xmlFree(handlers[nbCharEncodingHandler]->name);
1756 xmlFree(handlers[nbCharEncodingHandler]);
1757 }
1758 }
1759 xmlFree(handlers);
1760 handlers = NULL;
1761 nbCharEncodingHandler = 0;
1762 xmlDefaultCharEncodingHandler = NULL;
1763}
1764
1765/**
1766 * xmlRegisterCharEncodingHandler:
1767 * @handler: the xmlCharEncodingHandlerPtr handler block
1768 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001769 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001770 */
1771void
1772xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1773 if (handlers == NULL) xmlInitCharEncodingHandlers();
1774 if (handler == NULL) {
1775 xmlGenericError(xmlGenericErrorContext,
1776 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1777 return;
1778 }
1779
1780 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1781 xmlGenericError(xmlGenericErrorContext,
1782 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1783 xmlGenericError(xmlGenericErrorContext,
1784 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1785 return;
1786 }
1787 handlers[nbCharEncodingHandler++] = handler;
1788}
1789
1790/**
1791 * xmlGetCharEncodingHandler:
1792 * @enc: an xmlCharEncoding value.
1793 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001794 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001795 *
1796 * Returns the handler or NULL if not found
1797 */
1798xmlCharEncodingHandlerPtr
1799xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1800 xmlCharEncodingHandlerPtr handler;
1801
1802 if (handlers == NULL) xmlInitCharEncodingHandlers();
1803 switch (enc) {
1804 case XML_CHAR_ENCODING_ERROR:
1805 return(NULL);
1806 case XML_CHAR_ENCODING_NONE:
1807 return(NULL);
1808 case XML_CHAR_ENCODING_UTF8:
1809 return(NULL);
1810 case XML_CHAR_ENCODING_UTF16LE:
1811 return(xmlUTF16LEHandler);
1812 case XML_CHAR_ENCODING_UTF16BE:
1813 return(xmlUTF16BEHandler);
1814 case XML_CHAR_ENCODING_EBCDIC:
1815 handler = xmlFindCharEncodingHandler("EBCDIC");
1816 if (handler != NULL) return(handler);
1817 handler = xmlFindCharEncodingHandler("ebcdic");
1818 if (handler != NULL) return(handler);
1819 break;
1820 case XML_CHAR_ENCODING_UCS4BE:
1821 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1822 if (handler != NULL) return(handler);
1823 handler = xmlFindCharEncodingHandler("UCS-4");
1824 if (handler != NULL) return(handler);
1825 handler = xmlFindCharEncodingHandler("UCS4");
1826 if (handler != NULL) return(handler);
1827 break;
1828 case XML_CHAR_ENCODING_UCS4LE:
1829 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1830 if (handler != NULL) return(handler);
1831 handler = xmlFindCharEncodingHandler("UCS-4");
1832 if (handler != NULL) return(handler);
1833 handler = xmlFindCharEncodingHandler("UCS4");
1834 if (handler != NULL) return(handler);
1835 break;
1836 case XML_CHAR_ENCODING_UCS4_2143:
1837 break;
1838 case XML_CHAR_ENCODING_UCS4_3412:
1839 break;
1840 case XML_CHAR_ENCODING_UCS2:
1841 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1842 if (handler != NULL) return(handler);
1843 handler = xmlFindCharEncodingHandler("UCS-2");
1844 if (handler != NULL) return(handler);
1845 handler = xmlFindCharEncodingHandler("UCS2");
1846 if (handler != NULL) return(handler);
1847 break;
1848
1849 /*
1850 * We used to keep ISO Latin encodings native in the
1851 * generated data. This led to so many problems that
1852 * this has been removed. One can still change this
1853 * back by registering no-ops encoders for those
1854 */
1855 case XML_CHAR_ENCODING_8859_1:
1856 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1857 if (handler != NULL) return(handler);
1858 break;
1859 case XML_CHAR_ENCODING_8859_2:
1860 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1861 if (handler != NULL) return(handler);
1862 break;
1863 case XML_CHAR_ENCODING_8859_3:
1864 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1865 if (handler != NULL) return(handler);
1866 break;
1867 case XML_CHAR_ENCODING_8859_4:
1868 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1869 if (handler != NULL) return(handler);
1870 break;
1871 case XML_CHAR_ENCODING_8859_5:
1872 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1873 if (handler != NULL) return(handler);
1874 break;
1875 case XML_CHAR_ENCODING_8859_6:
1876 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1877 if (handler != NULL) return(handler);
1878 break;
1879 case XML_CHAR_ENCODING_8859_7:
1880 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1881 if (handler != NULL) return(handler);
1882 break;
1883 case XML_CHAR_ENCODING_8859_8:
1884 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1885 if (handler != NULL) return(handler);
1886 break;
1887 case XML_CHAR_ENCODING_8859_9:
1888 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1889 if (handler != NULL) return(handler);
1890 break;
1891
1892
1893 case XML_CHAR_ENCODING_2022_JP:
1894 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1895 if (handler != NULL) return(handler);
1896 break;
1897 case XML_CHAR_ENCODING_SHIFT_JIS:
1898 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1899 if (handler != NULL) return(handler);
1900 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1901 if (handler != NULL) return(handler);
1902 handler = xmlFindCharEncodingHandler("Shift_JIS");
1903 if (handler != NULL) return(handler);
1904 break;
1905 case XML_CHAR_ENCODING_EUC_JP:
1906 handler = xmlFindCharEncodingHandler("EUC-JP");
1907 if (handler != NULL) return(handler);
1908 break;
1909 default:
1910 break;
1911 }
1912
1913#ifdef DEBUG_ENCODING
1914 xmlGenericError(xmlGenericErrorContext,
1915 "No handler found for encoding %d\n", enc);
1916#endif
1917 return(NULL);
1918}
1919
1920/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001921 * xmlFindCharEncodingHandler:
1922 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001923 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001924 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001925 *
1926 * Returns the handler or NULL if not found
1927 */
1928xmlCharEncodingHandlerPtr
1929xmlFindCharEncodingHandler(const char *name) {
1930 const char *nalias;
1931 const char *norig;
1932 xmlCharEncoding alias;
1933#ifdef LIBXML_ICONV_ENABLED
1934 xmlCharEncodingHandlerPtr enc;
1935 iconv_t icv_in, icv_out;
1936#endif /* LIBXML_ICONV_ENABLED */
1937 char upper[100];
1938 int i;
1939
1940 if (handlers == NULL) xmlInitCharEncodingHandlers();
1941 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1942 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1943
1944 /*
1945 * Do the alias resolution
1946 */
1947 norig = name;
1948 nalias = xmlGetEncodingAlias(name);
1949 if (nalias != NULL)
1950 name = nalias;
1951
1952 /*
1953 * Check first for directly registered encoding names
1954 */
1955 for (i = 0;i < 99;i++) {
1956 upper[i] = toupper(name[i]);
1957 if (upper[i] == 0) break;
1958 }
1959 upper[i] = 0;
1960
1961 for (i = 0;i < nbCharEncodingHandler; i++)
1962 if (!strcmp(upper, handlers[i]->name)) {
1963#ifdef DEBUG_ENCODING
1964 xmlGenericError(xmlGenericErrorContext,
1965 "Found registered handler for encoding %s\n", name);
1966#endif
1967 return(handlers[i]);
1968 }
1969
1970#ifdef LIBXML_ICONV_ENABLED
1971 /* check whether iconv can handle this */
1972 icv_in = iconv_open("UTF-8", name);
1973 icv_out = iconv_open(name, "UTF-8");
1974 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1975 enc = (xmlCharEncodingHandlerPtr)
1976 xmlMalloc(sizeof(xmlCharEncodingHandler));
1977 if (enc == NULL) {
1978 iconv_close(icv_in);
1979 iconv_close(icv_out);
1980 return(NULL);
1981 }
1982 enc->name = xmlMemStrdup(name);
1983 enc->input = NULL;
1984 enc->output = NULL;
1985 enc->iconv_in = icv_in;
1986 enc->iconv_out = icv_out;
1987#ifdef DEBUG_ENCODING
1988 xmlGenericError(xmlGenericErrorContext,
1989 "Found iconv handler for encoding %s\n", name);
1990#endif
1991 return enc;
1992 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1993 xmlGenericError(xmlGenericErrorContext,
1994 "iconv : problems with filters for '%s'\n", name);
1995 }
1996#endif /* LIBXML_ICONV_ENABLED */
1997
1998#ifdef DEBUG_ENCODING
1999 xmlGenericError(xmlGenericErrorContext,
2000 "No handler found for encoding %s\n", name);
2001#endif
2002
2003 /*
2004 * Fallback using the canonical names
2005 */
2006 alias = xmlParseCharEncoding(norig);
2007 if (alias != XML_CHAR_ENCODING_ERROR) {
2008 const char* canon;
2009 canon = xmlGetCharEncodingName(alias);
2010 if ((canon != NULL) && (strcmp(name, canon))) {
2011 return(xmlFindCharEncodingHandler(canon));
2012 }
2013 }
2014
William M. Brackf9415e42003-11-28 09:39:10 +00002015 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00002016 return(NULL);
2017}
2018
Daniel Veillard97ac1312001-05-30 19:14:17 +00002019/************************************************************************
2020 * *
2021 * ICONV based generic conversion functions *
2022 * *
2023 ************************************************************************/
2024
Owen Taylor3473f882001-02-23 17:55:21 +00002025#ifdef LIBXML_ICONV_ENABLED
2026/**
2027 * xmlIconvWrapper:
2028 * @cd: iconv converter data structure
2029 * @out: a pointer to an array of bytes to store the result
2030 * @outlen: the length of @out
2031 * @in: a pointer to an array of ISO Latin 1 chars
2032 * @inlen: the length of @in
2033 *
2034 * Returns 0 if success, or
2035 * -1 by lack of space, or
2036 * -2 if the transcoding fails (for *in is not valid utf8 string or
2037 * the result of transformation can't fit into the encoding we want), or
2038 * -3 if there the last byte can't form a single output char.
2039 *
2040 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002041 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00002042 * The value of @outlen after return is the number of ocetes consumed.
2043 */
2044static int
2045xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00002046 unsigned char *out, int *outlen,
2047 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00002048
Daniel Veillard9403a042001-05-28 11:00:53 +00002049 size_t icv_inlen = *inlen, icv_outlen = *outlen;
2050 const char *icv_in = (const char *) in;
2051 char *icv_out = (char *) out;
2052 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002053
Darin Adler699613b2001-07-27 22:47:14 +00002054 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00002055 if (in != NULL) {
2056 *inlen -= icv_inlen;
2057 *outlen -= icv_outlen;
2058 } else {
2059 *inlen = 0;
2060 *outlen = 0;
2061 }
2062 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002063#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00002064 if (errno == EILSEQ) {
2065 return -2;
2066 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002067#endif
2068#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00002069 if (errno == E2BIG) {
2070 return -1;
2071 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002072#endif
2073#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00002074 if (errno == EINVAL) {
2075 return -3;
2076 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002077#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00002078 {
2079 return -3;
2080 }
2081 }
2082 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002083}
2084#endif /* LIBXML_ICONV_ENABLED */
2085
Daniel Veillard97ac1312001-05-30 19:14:17 +00002086/************************************************************************
2087 * *
2088 * The real API used by libxml for on-the-fly conversion *
2089 * *
2090 ************************************************************************/
2091
Owen Taylor3473f882001-02-23 17:55:21 +00002092/**
2093 * xmlCharEncFirstLine:
2094 * @handler: char enconding transformation data structure
2095 * @out: an xmlBuffer for the output.
2096 * @in: an xmlBuffer for the input
2097 *
2098 * Front-end for the encoding handler input function, but handle only
2099 * the very first line, i.e. limit itself to 45 chars.
2100 *
2101 * Returns the number of byte written if success, or
2102 * -1 general error
2103 * -2 if the transcoding fails (for *in is not valid utf8 string or
2104 * the result of transformation can't fit into the encoding we want), or
2105 */
2106int
2107xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2108 xmlBufferPtr in) {
2109 int ret = -2;
2110 int written;
2111 int toconv;
2112
2113 if (handler == NULL) return(-1);
2114 if (out == NULL) return(-1);
2115 if (in == NULL) return(-1);
2116
2117 written = out->size - out->use;
2118 toconv = in->use;
2119 if (toconv * 2 >= written) {
2120 xmlBufferGrow(out, toconv);
2121 written = out->size - out->use - 1;
2122 }
2123
2124 /*
2125 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2126 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002127 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00002128 */
2129 written = 45;
2130
2131 if (handler->input != NULL) {
2132 ret = handler->input(&out->content[out->use], &written,
2133 in->content, &toconv);
2134 xmlBufferShrink(in, toconv);
2135 out->use += written;
2136 out->content[out->use] = 0;
2137 }
2138#ifdef LIBXML_ICONV_ENABLED
2139 else if (handler->iconv_in != NULL) {
2140 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2141 &written, in->content, &toconv);
2142 xmlBufferShrink(in, toconv);
2143 out->use += written;
2144 out->content[out->use] = 0;
2145 if (ret == -1) ret = -3;
2146 }
2147#endif /* LIBXML_ICONV_ENABLED */
2148#ifdef DEBUG_ENCODING
2149 switch (ret) {
2150 case 0:
2151 xmlGenericError(xmlGenericErrorContext,
2152 "converted %d bytes to %d bytes of input\n",
2153 toconv, written);
2154 break;
2155 case -1:
2156 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2157 toconv, written, in->use);
2158 break;
2159 case -2:
2160 xmlGenericError(xmlGenericErrorContext,
2161 "input conversion failed due to input error\n");
2162 break;
2163 case -3:
2164 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2165 toconv, written, in->use);
2166 break;
2167 default:
2168 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2169 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002170#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002171 /*
2172 * Ignore when input buffer is not on a boundary
2173 */
2174 if (ret == -3) ret = 0;
2175 if (ret == -1) ret = 0;
2176 return(ret);
2177}
2178
2179/**
2180 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002182 * @out: an xmlBuffer for the output.
2183 * @in: an xmlBuffer for the input
2184 *
2185 * Generic front-end for the encoding handler input function
2186 *
2187 * Returns the number of byte written if success, or
2188 * -1 general error
2189 * -2 if the transcoding fails (for *in is not valid utf8 string or
2190 * the result of transformation can't fit into the encoding we want), or
2191 */
2192int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002193xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2194 xmlBufferPtr in)
2195{
Owen Taylor3473f882001-02-23 17:55:21 +00002196 int ret = -2;
2197 int written;
2198 int toconv;
2199
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002200 if (handler == NULL)
2201 return (-1);
2202 if (out == NULL)
2203 return (-1);
2204 if (in == NULL)
2205 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002206
2207 toconv = in->use;
2208 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002209 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 written = out->size - out->use;
2211 if (toconv * 2 >= written) {
2212 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002213 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002214 }
2215 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002216 ret = handler->input(&out->content[out->use], &written,
2217 in->content, &toconv);
2218 xmlBufferShrink(in, toconv);
2219 out->use += written;
2220 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002221 }
2222#ifdef LIBXML_ICONV_ENABLED
2223 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002224 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2225 &written, in->content, &toconv);
2226 xmlBufferShrink(in, toconv);
2227 out->use += written;
2228 out->content[out->use] = 0;
2229 if (ret == -1)
2230 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232#endif /* LIBXML_ICONV_ENABLED */
2233 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002234 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002235#ifdef DEBUG_ENCODING
2236 xmlGenericError(xmlGenericErrorContext,
2237 "converted %d bytes to %d bytes of input\n",
2238 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002239#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002240 break;
2241 case -1:
2242#ifdef DEBUG_ENCODING
2243 xmlGenericError(xmlGenericErrorContext,
2244 "converted %d bytes to %d bytes of input, %d left\n",
2245 toconv, written, in->use);
2246#endif
2247 break;
2248 case -3:
2249#ifdef DEBUG_ENCODING
2250 xmlGenericError(xmlGenericErrorContext,
2251 "converted %d bytes to %d bytes of input, %d left\n",
2252 toconv, written, in->use);
2253#endif
2254 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002255 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002256 xmlGenericError(xmlGenericErrorContext,
2257 "input conversion failed due to input error\n");
2258 xmlGenericError(xmlGenericErrorContext,
2259 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2260 in->content[0], in->content[1],
2261 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00002262 }
2263 /*
2264 * Ignore when input buffer is not on a boundary
2265 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002266 if (ret == -3)
2267 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00002268 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00002269}
2270
2271/**
2272 * xmlCharEncOutFunc:
2273 * @handler: char enconding transformation data structure
2274 * @out: an xmlBuffer for the output.
2275 * @in: an xmlBuffer for the input
2276 *
2277 * Generic front-end for the encoding handler output function
2278 * a first call with @in == NULL has to be made firs to initiate the
2279 * output in case of non-stateless encoding needing to initiate their
2280 * state or the output (like the BOM in UTF16).
2281 * In case of UTF8 sequence conversion errors for the given encoder,
2282 * the content will be automatically remapped to a CharRef sequence.
2283 *
2284 * Returns the number of byte written if success, or
2285 * -1 general error
2286 * -2 if the transcoding fails (for *in is not valid utf8 string or
2287 * the result of transformation can't fit into the encoding we want), or
2288 */
2289int
2290xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2291 xmlBufferPtr in) {
2292 int ret = -2;
2293 int written;
2294 int writtentot = 0;
2295 int toconv;
2296 int output = 0;
2297
2298 if (handler == NULL) return(-1);
2299 if (out == NULL) return(-1);
2300
2301retry:
2302
2303 written = out->size - out->use;
2304
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002305 if (written > 0)
2306 written--; /* Gennady: count '/0' */
2307
Owen Taylor3473f882001-02-23 17:55:21 +00002308 /*
2309 * First specific handling of in = NULL, i.e. the initialization call
2310 */
2311 if (in == NULL) {
2312 toconv = 0;
2313 if (handler->output != NULL) {
2314 ret = handler->output(&out->content[out->use], &written,
2315 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002316 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002317 out->use += written;
2318 out->content[out->use] = 0;
2319 }
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321#ifdef LIBXML_ICONV_ENABLED
2322 else if (handler->iconv_out != NULL) {
2323 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2324 &written, NULL, &toconv);
2325 out->use += written;
2326 out->content[out->use] = 0;
2327 }
2328#endif /* LIBXML_ICONV_ENABLED */
2329#ifdef DEBUG_ENCODING
2330 xmlGenericError(xmlGenericErrorContext,
2331 "initialized encoder\n");
2332#endif
2333 return(0);
2334 }
2335
2336 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002337 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002338 */
2339 toconv = in->use;
2340 if (toconv == 0)
2341 return(0);
2342 if (toconv * 2 >= written) {
2343 xmlBufferGrow(out, toconv * 2);
2344 written = out->size - out->use - 1;
2345 }
2346 if (handler->output != NULL) {
2347 ret = handler->output(&out->content[out->use], &written,
2348 in->content, &toconv);
2349 xmlBufferShrink(in, toconv);
2350 out->use += written;
2351 writtentot += written;
2352 out->content[out->use] = 0;
2353 }
2354#ifdef LIBXML_ICONV_ENABLED
2355 else if (handler->iconv_out != NULL) {
2356 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2357 &written, in->content, &toconv);
2358 xmlBufferShrink(in, toconv);
2359 out->use += written;
2360 writtentot += written;
2361 out->content[out->use] = 0;
2362 if (ret == -1) {
2363 if (written > 0) {
2364 /*
2365 * Can be a limitation of iconv
2366 */
2367 goto retry;
2368 }
2369 ret = -3;
2370 }
2371 }
2372#endif /* LIBXML_ICONV_ENABLED */
2373 else {
2374 xmlGenericError(xmlGenericErrorContext,
2375 "xmlCharEncOutFunc: no output function !\n");
2376 return(-1);
2377 }
2378
2379 if (ret >= 0) output += ret;
2380
2381 /*
2382 * Attempt to handle error cases
2383 */
2384 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002385 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002386#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002387 xmlGenericError(xmlGenericErrorContext,
2388 "converted %d bytes to %d bytes of output\n",
2389 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002390#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002391 break;
2392 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002393#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002394 xmlGenericError(xmlGenericErrorContext,
2395 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002396#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002397 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002398 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002399#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002400 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2401 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002402#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002403 break;
2404 case -2: {
2405 int len = in->use;
2406 const xmlChar *utf = (const xmlChar *) in->content;
2407 int cur;
2408
2409 cur = xmlGetUTF8Char(utf, &len);
2410 if (cur > 0) {
2411 xmlChar charref[20];
2412
2413#ifdef DEBUG_ENCODING
2414 xmlGenericError(xmlGenericErrorContext,
2415 "handling output conversion error\n");
2416 xmlGenericError(xmlGenericErrorContext,
2417 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2418 in->content[0], in->content[1],
2419 in->content[2], in->content[3]);
2420#endif
2421 /*
2422 * Removes the UTF8 sequence, and replace it by a charref
2423 * and continue the transcoding phase, hoping the error
2424 * did not mangle the encoder state.
2425 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002426 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 xmlBufferShrink(in, len);
2428 xmlBufferAddHead(in, charref, -1);
2429
2430 goto retry;
2431 } else {
2432 xmlGenericError(xmlGenericErrorContext,
2433 "output conversion failed due to conv error\n");
2434 xmlGenericError(xmlGenericErrorContext,
2435 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2436 in->content[0], in->content[1],
2437 in->content[2], in->content[3]);
2438 in->content[0] = ' ';
2439 }
2440 break;
2441 }
2442 }
2443 return(ret);
2444}
2445
2446/**
2447 * xmlCharEncCloseFunc:
2448 * @handler: char enconding transformation data structure
2449 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002450 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002451 *
2452 * Returns 0 if success, or -1 in case of error
2453 */
2454int
2455xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2456 int ret = 0;
2457 if (handler == NULL) return(-1);
2458 if (handler->name == NULL) return(-1);
2459#ifdef LIBXML_ICONV_ENABLED
2460 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 * and the associated icon resources.
2463 */
2464 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2465 if (handler->name != NULL)
2466 xmlFree(handler->name);
2467 handler->name = NULL;
2468 if (handler->iconv_out != NULL) {
2469 if (iconv_close(handler->iconv_out))
2470 ret = -1;
2471 handler->iconv_out = NULL;
2472 }
2473 if (handler->iconv_in != NULL) {
2474 if (iconv_close(handler->iconv_in))
2475 ret = -1;
2476 handler->iconv_in = NULL;
2477 }
2478 xmlFree(handler);
2479 }
2480#endif /* LIBXML_ICONV_ENABLED */
2481#ifdef DEBUG_ENCODING
2482 if (ret)
2483 xmlGenericError(xmlGenericErrorContext,
2484 "failed to close the encoding handler\n");
2485 else
2486 xmlGenericError(xmlGenericErrorContext,
2487 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002488#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002489
Owen Taylor3473f882001-02-23 17:55:21 +00002490 return(ret);
2491}
2492
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002493#ifndef LIBXML_ICONV_ENABLED
2494#ifdef LIBXML_ISO8859X_ENABLED
2495
2496/**
2497 * UTF8ToISO8859x:
2498 * @out: a pointer to an array of bytes to store the result
2499 * @outlen: the length of @out
2500 * @in: a pointer to an array of UTF-8 chars
2501 * @inlen: the length of @in
2502 * @xlattable: the 2-level transcoding table
2503 *
2504 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2505 * block of chars out.
2506 *
2507 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2508 * The value of @inlen after return is the number of octets consumed
2509 * as the return value is positive, else unpredictable.
2510 * The value of @outlen after return is the number of ocetes consumed.
2511 */
2512static int
2513UTF8ToISO8859x(unsigned char* out, int *outlen,
2514 const unsigned char* in, int *inlen,
2515 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002516 const unsigned char* outstart = out;
2517 const unsigned char* inend;
2518 const unsigned char* instart = in;
2519
2520 if (in == NULL) {
2521 /*
2522 * initialization nothing to do
2523 */
2524 *outlen = 0;
2525 *inlen = 0;
2526 return(0);
2527 }
2528 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002529 while (in < inend) {
2530 unsigned char d = *in++;
2531 if (d < 0x80) {
2532 *out++ = d;
2533 } else if (d < 0xC0) {
2534 /* trailing byte in leading position */
2535 *outlen = out - outstart;
2536 *inlen = in - instart - 1;
2537 return(-2);
2538 } else if (d < 0xE0) {
2539 unsigned char c;
2540 if (!(in < inend)) {
2541 /* trailing byte not in input buffer */
2542 *outlen = out - outstart;
2543 *inlen = in - instart - 1;
2544 return(-2);
2545 }
2546 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002547 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002548 /* not a trailing byte */
2549 *outlen = out - outstart;
2550 *inlen = in - instart - 2;
2551 return(-2);
2552 }
2553 c = c & 0x3F;
2554 d = d & 0x1F;
2555 d = xlattable [48 + c + xlattable [d] * 64];
2556 if (d == 0) {
2557 /* not in character set */
2558 *outlen = out - outstart;
2559 *inlen = in - instart - 2;
2560 return(-2);
2561 }
2562 *out++ = d;
2563 } else if (d < 0xF0) {
2564 unsigned char c1;
2565 unsigned char c2;
2566 if (!(in < inend - 1)) {
2567 /* trailing bytes not in input buffer */
2568 *outlen = out - outstart;
2569 *inlen = in - instart - 1;
2570 return(-2);
2571 }
2572 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002573 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002574 /* not a trailing byte (c1) */
2575 *outlen = out - outstart;
2576 *inlen = in - instart - 2;
2577 return(-2);
2578 }
2579 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002580 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002581 /* not a trailing byte (c2) */
2582 *outlen = out - outstart;
2583 *inlen = in - instart - 2;
2584 return(-2);
2585 }
2586 c1 = c1 & 0x3F;
2587 c2 = c2 & 0x3F;
2588 d = d & 0x0F;
2589 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2590 if (d == 0) {
2591 /* not in character set */
2592 *outlen = out - outstart;
2593 *inlen = in - instart - 3;
2594 return(-2);
2595 }
2596 *out++ = d;
2597 } else {
2598 /* cannot transcode >= U+010000 */
2599 *outlen = out - outstart;
2600 *inlen = in - instart - 1;
2601 return(-2);
2602 }
2603 }
2604 *outlen = out - outstart;
2605 *inlen = in - instart;
2606 return(0);
2607}
2608
2609/**
2610 * ISO8859xToUTF8
2611 * @out: a pointer to an array of bytes to store the result
2612 * @outlen: the length of @out
2613 * @in: a pointer to an array of ISO Latin 1 chars
2614 * @inlen: the length of @in
2615 *
2616 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2617 * block of chars out.
2618 * Returns 0 if success, or -1 otherwise
2619 * The value of @inlen after return is the number of octets consumed
2620 * The value of @outlen after return is the number of ocetes produced.
2621 */
2622static int
2623ISO8859xToUTF8(unsigned char* out, int *outlen,
2624 const unsigned char* in, int *inlen,
2625 unsigned short const *unicodetable) {
2626 unsigned char* outstart = out;
2627 unsigned char* outend = out + *outlen;
2628 const unsigned char* instart = in;
2629 const unsigned char* inend = in + *inlen;
2630 const unsigned char* instop = inend;
2631 unsigned int c = *in;
2632
2633 while (in < inend && out < outend - 1) {
2634 if (c >= 0x80) {
2635 c = unicodetable [c - 0x80];
2636 if (c == 0) {
2637 /* undefined code point */
2638 *outlen = out - outstart;
2639 *inlen = in - instart;
2640 return (-1);
2641 }
2642 if (c < 0x800) {
2643 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2644 *out++ = (c & 0x3F) | 0x80;
2645 } else {
2646 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2647 *out++ = ((c >> 6) & 0x3F) | 0x80;
2648 *out++ = (c & 0x3F) | 0x80;
2649 }
2650 ++in;
2651 c = *in;
2652 }
2653 if (instop - in > outend - out) instop = in + (outend - out);
2654 while (c < 0x80 && in < instop) {
2655 *out++ = c;
2656 ++in;
2657 c = *in;
2658 }
2659 }
2660 if (in < inend && out < outend && c < 0x80) {
2661 *out++ = c;
2662 ++in;
2663 }
2664 *outlen = out - outstart;
2665 *inlen = in - instart;
2666 return (0);
2667}
2668
2669
2670/************************************************************************
2671 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2672 ************************************************************************/
2673
2674static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2675 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2676 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2677 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2678 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2679 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2680 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2681 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2682 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2683 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2684 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2685 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2686 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2687 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2688 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2689 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2690 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2691};
2692
2693static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2694 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2702 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2703 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2704 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2706 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2714 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2715 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2716 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2717 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2718 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2719 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2720 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2721};
2722
2723static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2724 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2725 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2726 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2727 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2728 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2729 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2730 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2731 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2732 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2733 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2734 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2735 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2736 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2737 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2738 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2739 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2740};
2741
2742static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2743 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2751 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2752 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2753 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2754 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2756 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2770 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2771 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2772 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2773 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2774};
2775
2776static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2777 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2778 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2779 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2780 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2781 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2782 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2783 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2784 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2785 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2786 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2787 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2788 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2789 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2790 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2791 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2792 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2793};
2794
2795static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2796 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2804 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2805 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2806 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2807 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2808 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2809 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2810 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2811 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2812 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2814 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2815 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2820 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2821 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2822 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2823};
2824
2825static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2826 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2827 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2828 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2829 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2830 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2831 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2832 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2833 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2834 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2835 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2836 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2837 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2838 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2839 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2840 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2841 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2842};
2843
2844static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2845 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2847 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2853 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2854 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2857 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2858 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2859 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2860 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2861 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2862 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2870 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872};
2873
2874static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2875 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2876 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2877 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2878 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2879 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2880 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2881 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2882 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2883 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2884 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2885 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2886 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2887 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2888 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2889 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2890 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2891};
2892
2893static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2894 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2900 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2901 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2902 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2903 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2911 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2912 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2913 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2914 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917};
2918
2919static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2920 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2921 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2922 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2923 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2924 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2925 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2926 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2927 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2928 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2929 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2930 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2931 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2932 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2933 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2934 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2935 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2936};
2937
2938static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2939 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2947 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2948 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2949 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2950 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2963 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2964 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2965 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2966 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970};
2971
2972static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2973 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2974 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2975 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2976 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2977 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2978 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2979 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2980 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2981 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2982 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2983 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2984 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2985 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2986 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2987 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2988 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2989};
2990
2991static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2992 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3000 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3001 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3002 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3003 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3005 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3016 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3021 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023};
3024
3025static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3026 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3027 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3028 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3029 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3030 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3031 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3032 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3033 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3034 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3035 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3036 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3037 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3038 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3039 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3040 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3041 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3042};
3043
3044static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3045 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3053 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3054 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3055 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3056 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3057 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3058 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3059 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068};
3069
3070static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3071 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3072 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3073 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3074 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3075 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3076 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3077 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3078 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3079 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3080 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3081 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3082 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3083 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3084 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3085 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3086 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3087};
3088
3089static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3090 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3098 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3099 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3100 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3102 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3104 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3108 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3109 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3118 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3119 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3120 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3121};
3122
3123static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3124 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3125 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3126 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3127 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3128 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3129 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3130 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3131 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3132 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3133 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3134 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3135 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3136 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3137 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3138 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3139 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3140};
3141
3142static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3143 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3151 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3152 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3158 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3159 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3160 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3161 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3162 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3167 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170};
3171
3172static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3173 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3174 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3175 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3176 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3177 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3178 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3179 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3180 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3181 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3182 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3183 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3184 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3185 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3186 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3187 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3188 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3189};
3190
3191static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3192 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3200 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3201 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3202 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3203 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3213 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3215 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3217 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3218 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3219 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3220 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3222 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3223};
3224
3225static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3226 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3227 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3228 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3229 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3230 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3231 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3232 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3233 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3234 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3235 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3236 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3237 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3238 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3239 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3240 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3241 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3242};
3243
3244static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3245 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3253 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3254 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3255 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3260 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3280 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3285 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3286 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3287 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3288};
3289
3290static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3291 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3292 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3293 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3294 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3295 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3296 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3297 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3298 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3299 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3300 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3301 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3302 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3303 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3304 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3305 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3306 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3307};
3308
3309static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3310 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3318 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3319 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3320 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3321 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3328 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3333 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3334 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3335 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3336 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3337};
3338
3339static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3340 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3341 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3342 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3343 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3344 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3345 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3346 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3347 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3348 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3349 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3350 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3351 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3352 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3353 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3354 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3355 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3356};
3357
3358static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3359 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3367 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3368 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3369 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3370 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3371 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3376 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3378 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3395 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3396 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3397 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3398};
3399
3400
3401/*
3402 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3403 */
3404
3405static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3406 const unsigned char* in, int *inlen) {
3407 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3408}
3409static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3410 const unsigned char* in, int *inlen) {
3411 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3412}
3413
3414static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3415 const unsigned char* in, int *inlen) {
3416 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3417}
3418static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3419 const unsigned char* in, int *inlen) {
3420 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3421}
3422
3423static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3424 const unsigned char* in, int *inlen) {
3425 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3426}
3427static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3428 const unsigned char* in, int *inlen) {
3429 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3430}
3431
3432static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3433 const unsigned char* in, int *inlen) {
3434 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3435}
3436static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3437 const unsigned char* in, int *inlen) {
3438 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3439}
3440
3441static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3442 const unsigned char* in, int *inlen) {
3443 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3444}
3445static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3446 const unsigned char* in, int *inlen) {
3447 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3448}
3449
3450static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3451 const unsigned char* in, int *inlen) {
3452 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3453}
3454static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3455 const unsigned char* in, int *inlen) {
3456 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3457}
3458
3459static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3460 const unsigned char* in, int *inlen) {
3461 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3462}
3463static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3464 const unsigned char* in, int *inlen) {
3465 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3466}
3467
3468static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3469 const unsigned char* in, int *inlen) {
3470 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3471}
3472static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3473 const unsigned char* in, int *inlen) {
3474 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3475}
3476
3477static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3478 const unsigned char* in, int *inlen) {
3479 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3480}
3481static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3482 const unsigned char* in, int *inlen) {
3483 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3484}
3485
3486static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3487 const unsigned char* in, int *inlen) {
3488 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3489}
3490static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3491 const unsigned char* in, int *inlen) {
3492 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3493}
3494
3495static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3496 const unsigned char* in, int *inlen) {
3497 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3498}
3499static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3500 const unsigned char* in, int *inlen) {
3501 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3502}
3503
3504static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3505 const unsigned char* in, int *inlen) {
3506 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3507}
3508static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3509 const unsigned char* in, int *inlen) {
3510 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3511}
3512
3513static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3514 const unsigned char* in, int *inlen) {
3515 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3516}
3517static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3518 const unsigned char* in, int *inlen) {
3519 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3520}
3521
3522static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3523 const unsigned char* in, int *inlen) {
3524 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3525}
3526static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3527 const unsigned char* in, int *inlen) {
3528 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3529}
3530
3531static void
3532xmlRegisterCharEncodingHandlersISO8859x (void) {
3533 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3534 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3535 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3536 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3537 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3538 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3539 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3540 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3541 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3542 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3543 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3544 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3545 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3546 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3547}
3548
3549#endif
3550#endif
3551
3552