blob: 90fd38c10d5e2236615650c0ec537305d6b9fd19 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
20 * UTF8 string routines from:
21 * "William M. Brack" <wbrack@mmm.com.hk>
22 *
23 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000024 */
25
Daniel Veillard34ce8be2002-03-18 19:37:11 +000026#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000027#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000028
Owen Taylor3473f882001-02-23 17:55:21 +000029#include <string.h>
30
31#ifdef HAVE_CTYPE_H
32#include <ctype.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
Owen Taylor3473f882001-02-23 17:55:21 +000037#ifdef LIBXML_ICONV_ENABLED
38#ifdef HAVE_ERRNO_H
39#include <errno.h>
40#endif
41#endif
42#include <libxml/encoding.h>
43#include <libxml/xmlmemory.h>
44#ifdef LIBXML_HTML_ENABLED
45#include <libxml/HTMLparser.h>
46#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000047#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000048#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049
Daniel Veillard22090732001-07-16 00:06:07 +000050static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
51static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000052
53typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
54typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
55struct _xmlCharEncodingAlias {
56 const char *name;
57 const char *alias;
58};
59
60static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
61static int xmlCharEncodingAliasesNb = 0;
62static int xmlCharEncodingAliasesMax = 0;
63
64#ifdef LIBXML_ICONV_ENABLED
65#if 0
66#define DEBUG_ENCODING /* Define this to get encoding traces */
67#endif
William M. Brack16db7b62003-08-07 13:12:49 +000068#else
69#ifdef LIBXML_ISO8859X_ENABLED
70static void xmlRegisterCharEncodingHandlersISO8859x (void);
71#endif
Owen Taylor3473f882001-02-23 17:55:21 +000072#endif
73
74static int xmlLittleEndian = 1;
75
Daniel Veillard97ac1312001-05-30 19:14:17 +000076/************************************************************************
77 * *
78 * Generic UTF8 handling routines *
79 * *
80 * From rfc2044: encoding of the Unicode values on UTF-8: *
81 * *
82 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
83 * 0000 0000-0000 007F 0xxxxxxx *
84 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
85 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
86 * *
87 * I hope we won't use values > 0xFFFF anytime soon ! *
88 * *
89 ************************************************************************/
Owen Taylor3473f882001-02-23 17:55:21 +000090
91/**
William M. Brack4a557d92003-07-29 04:28:04 +000092 * xmlUTF8Size:
93 * @utf: pointer to the UTF8 character
94 *
William M. Brack7a821652003-08-15 07:27:40 +000095 * calulates the internal size of a UTF8 character
96 *
William M. Brack4a557d92003-07-29 04:28:04 +000097 * returns the numbers of bytes in the character, -1 on format error
98 */
99int
100xmlUTF8Size(const xmlChar *utf) {
101 xmlChar mask;
102 int len;
103
104 if (utf == NULL)
105 return -1;
106 if (*utf < 0x80)
107 return 1;
108 /* check valid UTF8 character */
109 if (!(*utf & 0x40))
110 return -1;
111 /* determine number of bytes in char */
112 len = 2;
113 for (mask=0x20; mask != 0; mask>>=1) {
114 if (!(*utf & mask))
115 return len;
116 len++;
117 }
118 return -1;
119}
120
121/**
William M. Brack7a821652003-08-15 07:27:40 +0000122 * xmlUTF8Charcmp:
William M. Brack4a557d92003-07-29 04:28:04 +0000123 * @utf1: pointer to first UTF8 char
124 * @utf2: pointer to second UTF8 char
125 *
William M. Brack7a821652003-08-15 07:27:40 +0000126 * compares the two UCS4 values
127 *
128 * returns result of the compare as with xmlStrncmp
William M. Brack4a557d92003-07-29 04:28:04 +0000129 */
130int
131xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
132
133 if (utf1 == NULL ) {
134 if (utf2 == NULL)
135 return 0;
136 return -1;
137 }
Daniel Veillard9ff7de12003-07-29 13:30:42 +0000138 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
William M. Brack4a557d92003-07-29 04:28:04 +0000139}
140
141/**
Daniel Veillarde043ee12001-04-16 14:08:07 +0000142 * xmlUTF8Strlen:
143 * @utf: a sequence of UTF-8 encoded bytes
144 *
Daniel Veillard60087f32001-10-10 09:45:09 +0000145 * compute the length of an UTF8 string, it doesn't do a full UTF8
Daniel Veillarde043ee12001-04-16 14:08:07 +0000146 * checking of the content of the string.
147 *
148 * Returns the number of characters in the string or -1 in case of error
149 */
150int
Daniel Veillard97ac1312001-05-30 19:14:17 +0000151xmlUTF8Strlen(const xmlChar *utf) {
Daniel Veillarde043ee12001-04-16 14:08:07 +0000152 int ret = 0;
153
154 if (utf == NULL)
155 return(-1);
156
157 while (*utf != 0) {
158 if (utf[0] & 0x80) {
159 if ((utf[1] & 0xc0) != 0x80)
160 return(-1);
161 if ((utf[0] & 0xe0) == 0xe0) {
162 if ((utf[2] & 0xc0) != 0x80)
163 return(-1);
164 if ((utf[0] & 0xf0) == 0xf0) {
165 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
166 return(-1);
167 utf += 4;
168 } else {
169 utf += 3;
170 }
171 } else {
172 utf += 2;
173 }
174 } else {
175 utf++;
176 }
177 ret++;
178 }
179 return(ret);
180}
181
182/**
Owen Taylor3473f882001-02-23 17:55:21 +0000183 * xmlGetUTF8Char:
184 * @utf: a sequence of UTF-8 encoded bytes
185 * @len: a pointer to @bytes len
186 *
187 * Read one UTF8 Char from @utf
188 *
189 * Returns the char value or -1 in case of error and update @len with the
190 * number of bytes used
191 */
Daniel Veillardf000f072002-10-22 14:28:17 +0000192int
Owen Taylor3473f882001-02-23 17:55:21 +0000193xmlGetUTF8Char(const unsigned char *utf, int *len) {
194 unsigned int c;
195
196 if (utf == NULL)
197 goto error;
198 if (len == NULL)
199 goto error;
200 if (*len < 1)
201 goto error;
202
203 c = utf[0];
204 if (c & 0x80) {
205 if (*len < 2)
206 goto error;
207 if ((utf[1] & 0xc0) != 0x80)
208 goto error;
209 if ((c & 0xe0) == 0xe0) {
210 if (*len < 3)
211 goto error;
212 if ((utf[2] & 0xc0) != 0x80)
213 goto error;
214 if ((c & 0xf0) == 0xf0) {
215 if (*len < 4)
216 goto error;
217 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
218 goto error;
219 *len = 4;
220 /* 4-byte code */
221 c = (utf[0] & 0x7) << 18;
222 c |= (utf[1] & 0x3f) << 12;
223 c |= (utf[2] & 0x3f) << 6;
224 c |= utf[3] & 0x3f;
225 } else {
226 /* 3-byte code */
227 *len = 3;
228 c = (utf[0] & 0xf) << 12;
229 c |= (utf[1] & 0x3f) << 6;
230 c |= utf[2] & 0x3f;
231 }
232 } else {
233 /* 2-byte code */
234 *len = 2;
235 c = (utf[0] & 0x1f) << 6;
236 c |= utf[1] & 0x3f;
237 }
238 } else {
239 /* 1-byte code */
240 *len = 1;
241 }
242 return(c);
243
244error:
245 *len = 0;
246 return(-1);
247}
248
249/**
Daniel Veillard01c13b52002-12-10 15:19:08 +0000250 * xmlCheckUTF8:
Owen Taylor3473f882001-02-23 17:55:21 +0000251 * @utf: Pointer to putative utf-8 encoded string.
252 *
253 * Checks @utf for being valid utf-8. @utf is assumed to be
254 * null-terminated. This function is not super-strict, as it will
255 * allow longer utf-8 sequences than necessary. Note that Java is
256 * capable of producing these sequences if provoked. Also note, this
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000257 * routine checks for the 4-byte maximum size, but does not check for
Owen Taylor3473f882001-02-23 17:55:21 +0000258 * 0x10ffff maximum value.
259 *
260 * Return value: true if @utf is valid.
261 **/
262int
263xmlCheckUTF8(const unsigned char *utf)
264{
265 int ix;
266 unsigned char c;
267
268 for (ix = 0; (c = utf[ix]);) {
269 if (c & 0x80) {
270 if ((utf[ix + 1] & 0xc0) != 0x80)
271 return(0);
272 if ((c & 0xe0) == 0xe0) {
273 if ((utf[ix + 2] & 0xc0) != 0x80)
274 return(0);
275 if ((c & 0xf0) == 0xf0) {
276 if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
277 return(0);
278 ix += 4;
279 /* 4-byte code */
280 } else
281 /* 3-byte code */
282 ix += 3;
283 } else
284 /* 2-byte code */
285 ix += 2;
286 } else
287 /* 1-byte code */
288 ix++;
289 }
290 return(1);
291}
292
293/**
Daniel Veillard97ac1312001-05-30 19:14:17 +0000294 * xmlUTF8Strsize:
295 * @utf: a sequence of UTF-8 encoded bytes
296 * @len: the number of characters in the array
297 *
298 * storage size of an UTF8 string
299 *
300 * Returns the storage size of
301 * the first 'len' characters of ARRAY
302 *
303 */
304
305int
306xmlUTF8Strsize(const xmlChar *utf, int len) {
307 const xmlChar *ptr=utf;
308 xmlChar ch;
309
310 if (len <= 0)
311 return(0);
312
313 while ( len-- > 0) {
314 if ( !*ptr )
315 break;
316 if ( (ch = *ptr++) & 0x80)
317 while ( (ch<<=1) & 0x80 )
318 ptr++;
319 }
320 return (ptr - utf);
321}
322
323
324/**
325 * xmlUTF8Strndup:
326 * @utf: the input UTF8 *
327 * @len: the len of @utf (in chars)
328 *
329 * a strndup for array of UTF8's
330 *
331 * Returns a new UTF8 * or NULL
332 */
333xmlChar *
334xmlUTF8Strndup(const xmlChar *utf, int len) {
335 xmlChar *ret;
336 int i;
337
338 if ((utf == NULL) || (len < 0)) return(NULL);
339 i = xmlUTF8Strsize(utf, len);
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000340 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
Daniel Veillard97ac1312001-05-30 19:14:17 +0000341 if (ret == NULL) {
342 xmlGenericError(xmlGenericErrorContext,
343 "malloc of %ld byte failed\n",
344 (len + 1) * (long)sizeof(xmlChar));
345 return(NULL);
346 }
347 memcpy(ret, utf, i * sizeof(xmlChar));
348 ret[i] = 0;
349 return(ret);
350}
351
352/**
353 * xmlUTF8Strpos:
354 * @utf: the input UTF8 *
355 * @pos: the position of the desired UTF8 char (in chars)
356 *
357 * a function to provide the equivalent of fetching a
358 * character from a string array
359 *
360 * Returns a pointer to the UTF8 character or NULL
361 */
362xmlChar *
363xmlUTF8Strpos(const xmlChar *utf, int pos) {
364 xmlChar ch;
365
366 if (utf == NULL) return(NULL);
367 if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
368 return(NULL);
369 while (pos--) {
370 if ((ch=*utf++) == 0) return(NULL);
371 if ( ch & 0x80 ) {
372 /* if not simple ascii, verify proper format */
373 if ( (ch & 0xc0) != 0xc0 )
374 return(NULL);
375 /* then skip over remaining bytes for this char */
376 while ( (ch <<= 1) & 0x80 )
377 if ( (*utf++ & 0xc0) != 0x80 )
378 return(NULL);
379 }
380 }
381 return((xmlChar *)utf);
382}
383
384/**
385 * xmlUTF8Strloc:
386 * @utf: the input UTF8 *
387 * @utfchar: the UTF8 character to be found
388 *
389 * a function to provide relative location of a UTF8 char
390 *
391 * Returns the relative character position of the desired char
392 * or -1 if not found
393 */
394int
395xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
396 int i, size;
397 xmlChar ch;
398
399 if (utf==NULL || utfchar==NULL) return -1;
400 size = xmlUTF8Strsize(utfchar, 1);
401 for(i=0; (ch=*utf) != 0; i++) {
402 if (xmlStrncmp(utf, utfchar, size)==0)
403 return(i);
404 utf++;
405 if ( ch & 0x80 ) {
406 /* if not simple ascii, verify proper format */
407 if ( (ch & 0xc0) != 0xc0 )
408 return(-1);
409 /* then skip over remaining bytes for this char */
410 while ( (ch <<= 1) & 0x80 )
411 if ( (*utf++ & 0xc0) != 0x80 )
412 return(-1);
413 }
414 }
415
416 return(-1);
417}
418/**
419 * xmlUTF8Strsub:
420 * @utf: a sequence of UTF-8 encoded bytes
Daniel Veillard97ac1312001-05-30 19:14:17 +0000421 * @start: relative pos of first char
422 * @len: total number to copy
423 *
424 * Note: positions are given in units of UTF-8 chars
425 *
426 * Returns a pointer to a newly created string
427 * or NULL if any problem
428 */
429
430xmlChar *
431xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
432 int i;
433 xmlChar ch;
434
435 if (utf == NULL) return(NULL);
436 if (start < 0) return(NULL);
437 if (len < 0) return(NULL);
438
439 /*
440 * Skip over any leading chars
441 */
442 for (i = 0;i < start;i++) {
443 if ((ch=*utf++) == 0) return(NULL);
444 if ( ch & 0x80 ) {
445 /* if not simple ascii, verify proper format */
446 if ( (ch & 0xc0) != 0xc0 )
447 return(NULL);
448 /* then skip over remaining bytes for this char */
449 while ( (ch <<= 1) & 0x80 )
450 if ( (*utf++ & 0xc0) != 0x80 )
451 return(NULL);
452 }
453 }
454
455 return(xmlUTF8Strndup(utf, len));
456}
457
458/************************************************************************
459 * *
460 * Conversions To/From UTF8 encoding *
461 * *
462 ************************************************************************/
463
464/**
Owen Taylor3473f882001-02-23 17:55:21 +0000465 * asciiToUTF8:
466 * @out: a pointer to an array of bytes to store the result
467 * @outlen: the length of @out
468 * @in: a pointer to an array of ASCII chars
469 * @inlen: the length of @in
470 *
471 * Take a block of ASCII chars in and try to convert it to an UTF-8
472 * block of chars out.
473 * Returns 0 if success, or -1 otherwise
474 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000475 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000476 * The value of @outlen after return is the number of ocetes consumed.
477 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000478static int
Owen Taylor3473f882001-02-23 17:55:21 +0000479asciiToUTF8(unsigned char* out, int *outlen,
480 const unsigned char* in, int *inlen) {
481 unsigned char* outstart = out;
482 const unsigned char* base = in;
483 const unsigned char* processed = in;
484 unsigned char* outend = out + *outlen;
485 const unsigned char* inend;
486 unsigned int c;
487 int bits;
488
489 inend = in + (*inlen);
490 while ((in < inend) && (out - outstart + 5 < *outlen)) {
491 c= *in++;
492
493 /* assertion: c is a single UTF-4 value */
494 if (out >= outend)
495 break;
496 if (c < 0x80) { *out++= c; bits= -6; }
497 else {
498 *outlen = out - outstart;
499 *inlen = processed - base;
500 return(-1);
501 }
502
503 for ( ; bits >= 0; bits-= 6) {
504 if (out >= outend)
505 break;
506 *out++= ((c >> bits) & 0x3F) | 0x80;
507 }
508 processed = (const unsigned char*) in;
509 }
510 *outlen = out - outstart;
511 *inlen = processed - base;
512 return(0);
513}
514
515/**
516 * UTF8Toascii:
517 * @out: a pointer to an array of bytes to store the result
518 * @outlen: the length of @out
519 * @in: a pointer to an array of UTF-8 chars
520 * @inlen: the length of @in
521 *
522 * Take a block of UTF-8 chars in and try to convert it to an ASCII
523 * block of chars out.
524 *
525 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
526 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000527 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000528 * The value of @outlen after return is the number of ocetes consumed.
529 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000530static int
Owen Taylor3473f882001-02-23 17:55:21 +0000531UTF8Toascii(unsigned char* out, int *outlen,
532 const unsigned char* in, int *inlen) {
533 const unsigned char* processed = in;
534 const unsigned char* outend;
535 const unsigned char* outstart = out;
536 const unsigned char* instart = in;
537 const unsigned char* inend;
538 unsigned int c, d;
539 int trailing;
540
541 if (in == NULL) {
542 /*
543 * initialization nothing to do
544 */
545 *outlen = 0;
546 *inlen = 0;
547 return(0);
548 }
549 inend = in + (*inlen);
550 outend = out + (*outlen);
551 while (in < inend) {
552 d = *in++;
553 if (d < 0x80) { c= d; trailing= 0; }
554 else if (d < 0xC0) {
555 /* trailing byte in leading position */
556 *outlen = out - outstart;
557 *inlen = processed - instart;
558 return(-2);
559 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
560 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
561 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
562 else {
563 /* no chance for this in Ascii */
564 *outlen = out - outstart;
565 *inlen = processed - instart;
566 return(-2);
567 }
568
569 if (inend - in < trailing) {
570 break;
571 }
572
573 for ( ; trailing; trailing--) {
574 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
575 break;
576 c <<= 6;
577 c |= d & 0x3F;
578 }
579
580 /* assertion: c is a single UTF-4 value */
581 if (c < 0x80) {
582 if (out >= outend)
583 break;
584 *out++ = c;
585 } else {
586 /* no chance for this in Ascii */
587 *outlen = out - outstart;
588 *inlen = processed - instart;
589 return(-2);
590 }
591 processed = in;
592 }
593 *outlen = out - outstart;
594 *inlen = processed - instart;
595 return(0);
596}
597
598/**
599 * isolat1ToUTF8:
600 * @out: a pointer to an array of bytes to store the result
601 * @outlen: the length of @out
602 * @in: a pointer to an array of ISO Latin 1 chars
603 * @inlen: the length of @in
604 *
605 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
606 * block of chars out.
607 * Returns 0 if success, or -1 otherwise
608 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000609 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000610 * The value of @outlen after return is the number of ocetes consumed.
611 */
612int
613isolat1ToUTF8(unsigned char* out, int *outlen,
614 const unsigned char* in, int *inlen) {
615 unsigned char* outstart = out;
616 const unsigned char* base = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000617 unsigned char* outend = out + *outlen;
618 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000619 const unsigned char* instop;
620 xmlChar c = *in;
Owen Taylor3473f882001-02-23 17:55:21 +0000621
622 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000623 instop = inend;
624
625 while (in < inend && out < outend - 1) {
626 if (c >= 0x80) {
Daniel Veillarddb552912002-03-21 13:27:59 +0000627 *out++= ((c >> 6) & 0x1F) | 0xC0;
Daniel Veillard02141ea2001-04-30 11:46:40 +0000628 *out++= (c & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000629 ++in;
630 c = *in;
631 }
632 if (instop - in > outend - out) instop = in + (outend - out);
633 while (c < 0x80 && in < instop) {
634 *out++ = c;
635 ++in;
636 c = *in;
637 }
638 }
639 if (in < inend && out < outend && c < 0x80) {
640 *out++ = c;
641 ++in;
Owen Taylor3473f882001-02-23 17:55:21 +0000642 }
643 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000644 *inlen = in - base;
Owen Taylor3473f882001-02-23 17:55:21 +0000645 return(0);
646}
647
Daniel Veillard81601f92003-01-14 13:42:37 +0000648/**
649 * UTF8ToUTF8:
650 * @out: a pointer to an array of bytes to store the result
651 * @outlen: the length of @out
652 * @inb: a pointer to an array of UTF-8 chars
653 * @inlenb: the length of @in in UTF-8 chars
654 *
655 * No op copy operation for UTF8 handling.
656 *
657 * Returns the number of byte written, or -1 by lack of space, or -2
658 * if the transcoding fails (for *in is not valid utf16 string)
659 * The value of *inlen after return is the number of octets consumed
660 * as the return value is positive, else unpredictable.
661 */
662static int
663UTF8ToUTF8(unsigned char* out, int *outlen,
664 const unsigned char* inb, int *inlenb)
665{
666 int len;
667
668 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
669 return(-1);
670 if (*outlen > *inlenb) {
671 len = *inlenb;
672 } else {
673 len = *outlen;
674 }
675 if (len < 0)
676 return(-1);
677
678 memcpy(out, inb, len);
679
680 *outlen = len;
681 *inlenb = len;
682 return(0);
683}
684
Daniel Veillarde72c7562002-05-31 09:47:30 +0000685
Owen Taylor3473f882001-02-23 17:55:21 +0000686/**
687 * UTF8Toisolat1:
688 * @out: a pointer to an array of bytes to store the result
689 * @outlen: the length of @out
690 * @in: a pointer to an array of UTF-8 chars
691 * @inlen: the length of @in
692 *
693 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
694 * block of chars out.
695 *
696 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
697 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000698 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000699 * The value of @outlen after return is the number of ocetes consumed.
700 */
701int
702UTF8Toisolat1(unsigned char* out, int *outlen,
703 const unsigned char* in, int *inlen) {
704 const unsigned char* processed = in;
705 const unsigned char* outend;
706 const unsigned char* outstart = out;
707 const unsigned char* instart = in;
708 const unsigned char* inend;
709 unsigned int c, d;
710 int trailing;
711
712 if (in == NULL) {
713 /*
714 * initialization nothing to do
715 */
716 *outlen = 0;
717 *inlen = 0;
718 return(0);
719 }
720 inend = in + (*inlen);
721 outend = out + (*outlen);
722 while (in < inend) {
723 d = *in++;
724 if (d < 0x80) { c= d; trailing= 0; }
725 else if (d < 0xC0) {
726 /* trailing byte in leading position */
727 *outlen = out - outstart;
728 *inlen = processed - instart;
729 return(-2);
730 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
731 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
732 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
733 else {
734 /* no chance for this in IsoLat1 */
735 *outlen = out - outstart;
736 *inlen = processed - instart;
737 return(-2);
738 }
739
740 if (inend - in < trailing) {
741 break;
742 }
743
744 for ( ; trailing; trailing--) {
745 if (in >= inend)
746 break;
747 if (((d= *in++) & 0xC0) != 0x80) {
748 *outlen = out - outstart;
749 *inlen = processed - instart;
750 return(-2);
751 }
752 c <<= 6;
753 c |= d & 0x3F;
754 }
755
756 /* assertion: c is a single UTF-4 value */
757 if (c <= 0xFF) {
758 if (out >= outend)
759 break;
760 *out++ = c;
761 } else {
762 /* no chance for this in IsoLat1 */
763 *outlen = out - outstart;
764 *inlen = processed - instart;
765 return(-2);
766 }
767 processed = in;
768 }
769 *outlen = out - outstart;
770 *inlen = processed - instart;
771 return(0);
772}
773
774/**
775 * UTF16LEToUTF8:
776 * @out: a pointer to an array of bytes to store the result
777 * @outlen: the length of @out
778 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
779 * @inlenb: the length of @in in UTF-16LE chars
780 *
781 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000782 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000783 * is the same between the native type of this machine and the
784 * inputed one.
785 *
786 * Returns the number of byte written, or -1 by lack of space, or -2
787 * if the transcoding fails (for *in is not valid utf16 string)
788 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000789 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000790 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000791static int
Owen Taylor3473f882001-02-23 17:55:21 +0000792UTF16LEToUTF8(unsigned char* out, int *outlen,
793 const unsigned char* inb, int *inlenb)
794{
795 unsigned char* outstart = out;
796 const unsigned char* processed = inb;
797 unsigned char* outend = out + *outlen;
798 unsigned short* in = (unsigned short*) inb;
799 unsigned short* inend;
800 unsigned int c, d, inlen;
801 unsigned char *tmp;
802 int bits;
803
804 if ((*inlenb % 2) == 1)
805 (*inlenb)--;
806 inlen = *inlenb / 2;
807 inend = in + inlen;
808 while ((in < inend) && (out - outstart + 5 < *outlen)) {
809 if (xmlLittleEndian) {
810 c= *in++;
811 } else {
812 tmp = (unsigned char *) in;
813 c = *tmp++;
814 c = c | (((unsigned int)*tmp) << 8);
815 in++;
816 }
817 if ((c & 0xFC00) == 0xD800) { /* surrogates */
818 if (in >= inend) { /* (in > inend) shouldn't happens */
819 break;
820 }
821 if (xmlLittleEndian) {
822 d = *in++;
823 } else {
824 tmp = (unsigned char *) in;
825 d = *tmp++;
826 d = d | (((unsigned int)*tmp) << 8);
827 in++;
828 }
829 if ((d & 0xFC00) == 0xDC00) {
830 c &= 0x03FF;
831 c <<= 10;
832 c |= d & 0x03FF;
833 c += 0x10000;
834 }
835 else {
836 *outlen = out - outstart;
837 *inlenb = processed - inb;
838 return(-2);
839 }
840 }
841
842 /* assertion: c is a single UTF-4 value */
843 if (out >= outend)
844 break;
845 if (c < 0x80) { *out++= c; bits= -6; }
846 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
847 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
848 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
849
850 for ( ; bits >= 0; bits-= 6) {
851 if (out >= outend)
852 break;
853 *out++= ((c >> bits) & 0x3F) | 0x80;
854 }
855 processed = (const unsigned char*) in;
856 }
857 *outlen = out - outstart;
858 *inlenb = processed - inb;
859 return(0);
860}
861
862/**
863 * UTF8ToUTF16LE:
864 * @outb: a pointer to an array of bytes to store the result
865 * @outlen: the length of @outb
866 * @in: a pointer to an array of UTF-8 chars
867 * @inlen: the length of @in
868 *
869 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
870 * block of chars out.
871 *
872 * Returns the number of byte written, or -1 by lack of space, or -2
873 * if the transcoding failed.
874 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000875static int
Owen Taylor3473f882001-02-23 17:55:21 +0000876UTF8ToUTF16LE(unsigned char* outb, int *outlen,
877 const unsigned char* in, int *inlen)
878{
879 unsigned short* out = (unsigned short*) outb;
880 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000881 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000882 unsigned short* outstart= out;
883 unsigned short* outend;
884 const unsigned char* inend= in+*inlen;
885 unsigned int c, d;
886 int trailing;
887 unsigned char *tmp;
888 unsigned short tmp1, tmp2;
889
890 if (in == NULL) {
891 /*
892 * initialization, add the Byte Order Mark
893 */
894 if (*outlen >= 2) {
895 outb[0] = 0xFF;
896 outb[1] = 0xFE;
897 *outlen = 2;
898 *inlen = 0;
899#ifdef DEBUG_ENCODING
900 xmlGenericError(xmlGenericErrorContext,
901 "Added FFFE Byte Order Mark\n");
902#endif
903 return(2);
904 }
905 *outlen = 0;
906 *inlen = 0;
907 return(0);
908 }
909 outend = out + (*outlen / 2);
910 while (in < inend) {
911 d= *in++;
912 if (d < 0x80) { c= d; trailing= 0; }
913 else if (d < 0xC0) {
914 /* trailing byte in leading position */
915 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000916 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 return(-2);
918 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
919 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
920 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
921 else {
922 /* no chance for this in UTF-16 */
923 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000924 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000925 return(-2);
926 }
927
928 if (inend - in < trailing) {
929 break;
930 }
931
932 for ( ; trailing; trailing--) {
933 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
934 break;
935 c <<= 6;
936 c |= d & 0x3F;
937 }
938
939 /* assertion: c is a single UTF-4 value */
940 if (c < 0x10000) {
941 if (out >= outend)
942 break;
943 if (xmlLittleEndian) {
944 *out++ = c;
945 } else {
946 tmp = (unsigned char *) out;
947 *tmp = c ;
948 *(tmp + 1) = c >> 8 ;
949 out++;
950 }
951 }
952 else if (c < 0x110000) {
953 if (out+1 >= outend)
954 break;
955 c -= 0x10000;
956 if (xmlLittleEndian) {
957 *out++ = 0xD800 | (c >> 10);
958 *out++ = 0xDC00 | (c & 0x03FF);
959 } else {
960 tmp1 = 0xD800 | (c >> 10);
961 tmp = (unsigned char *) out;
962 *tmp = (unsigned char) tmp1;
963 *(tmp + 1) = tmp1 >> 8;
964 out++;
965
966 tmp2 = 0xDC00 | (c & 0x03FF);
967 tmp = (unsigned char *) out;
968 *tmp = (unsigned char) tmp2;
969 *(tmp + 1) = tmp2 >> 8;
970 out++;
971 }
972 }
973 else
974 break;
975 processed = in;
976 }
977 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000978 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000979 return(0);
980}
981
982/**
983 * UTF16BEToUTF8:
984 * @out: a pointer to an array of bytes to store the result
985 * @outlen: the length of @out
986 * @inb: a pointer to an array of UTF-16 passwd as a byte array
987 * @inlenb: the length of @in in UTF-16 chars
988 *
989 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000990 * block of chars out. This function assume the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000991 * is the same between the native type of this machine and the
992 * inputed one.
993 *
994 * Returns the number of byte written, or -1 by lack of space, or -2
995 * if the transcoding fails (for *in is not valid utf16 string)
996 * The value of *inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000997 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000998 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000999static int
Owen Taylor3473f882001-02-23 17:55:21 +00001000UTF16BEToUTF8(unsigned char* out, int *outlen,
1001 const unsigned char* inb, int *inlenb)
1002{
1003 unsigned char* outstart = out;
1004 const unsigned char* processed = inb;
1005 unsigned char* outend = out + *outlen;
1006 unsigned short* in = (unsigned short*) inb;
1007 unsigned short* inend;
1008 unsigned int c, d, inlen;
1009 unsigned char *tmp;
1010 int bits;
1011
1012 if ((*inlenb % 2) == 1)
1013 (*inlenb)--;
1014 inlen = *inlenb / 2;
1015 inend= in + inlen;
1016 while (in < inend) {
1017 if (xmlLittleEndian) {
1018 tmp = (unsigned char *) in;
1019 c = *tmp++;
1020 c = c << 8;
1021 c = c | (unsigned int) *tmp;
1022 in++;
1023 } else {
1024 c= *in++;
1025 }
1026 if ((c & 0xFC00) == 0xD800) { /* surrogates */
1027 if (in >= inend) { /* (in > inend) shouldn't happens */
1028 *outlen = out - outstart;
1029 *inlenb = processed - inb;
1030 return(-2);
1031 }
1032 if (xmlLittleEndian) {
1033 tmp = (unsigned char *) in;
1034 d = *tmp++;
1035 d = d << 8;
1036 d = d | (unsigned int) *tmp;
1037 in++;
1038 } else {
1039 d= *in++;
1040 }
1041 if ((d & 0xFC00) == 0xDC00) {
1042 c &= 0x03FF;
1043 c <<= 10;
1044 c |= d & 0x03FF;
1045 c += 0x10000;
1046 }
1047 else {
1048 *outlen = out - outstart;
1049 *inlenb = processed - inb;
1050 return(-2);
1051 }
1052 }
1053
1054 /* assertion: c is a single UTF-4 value */
1055 if (out >= outend)
1056 break;
1057 if (c < 0x80) { *out++= c; bits= -6; }
1058 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
1059 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
1060 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1061
1062 for ( ; bits >= 0; bits-= 6) {
1063 if (out >= outend)
1064 break;
1065 *out++= ((c >> bits) & 0x3F) | 0x80;
1066 }
1067 processed = (const unsigned char*) in;
1068 }
1069 *outlen = out - outstart;
1070 *inlenb = processed - inb;
1071 return(0);
1072}
1073
1074/**
1075 * UTF8ToUTF16BE:
1076 * @outb: a pointer to an array of bytes to store the result
1077 * @outlen: the length of @outb
1078 * @in: a pointer to an array of UTF-8 chars
1079 * @inlen: the length of @in
1080 *
1081 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1082 * block of chars out.
1083 *
1084 * Returns the number of byte written, or -1 by lack of space, or -2
1085 * if the transcoding failed.
1086 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001087static int
Owen Taylor3473f882001-02-23 17:55:21 +00001088UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1089 const unsigned char* in, int *inlen)
1090{
1091 unsigned short* out = (unsigned short*) outb;
1092 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001093 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +00001094 unsigned short* outstart= out;
1095 unsigned short* outend;
1096 const unsigned char* inend= in+*inlen;
1097 unsigned int c, d;
1098 int trailing;
1099 unsigned char *tmp;
1100 unsigned short tmp1, tmp2;
1101
1102 if (in == NULL) {
1103 /*
1104 * initialization, add the Byte Order Mark
1105 */
1106 if (*outlen >= 2) {
1107 outb[0] = 0xFE;
1108 outb[1] = 0xFF;
1109 *outlen = 2;
1110 *inlen = 0;
1111#ifdef DEBUG_ENCODING
1112 xmlGenericError(xmlGenericErrorContext,
1113 "Added FEFF Byte Order Mark\n");
1114#endif
1115 return(2);
1116 }
1117 *outlen = 0;
1118 *inlen = 0;
1119 return(0);
1120 }
1121 outend = out + (*outlen / 2);
1122 while (in < inend) {
1123 d= *in++;
1124 if (d < 0x80) { c= d; trailing= 0; }
1125 else if (d < 0xC0) {
1126 /* trailing byte in leading position */
1127 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001128 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001129 return(-2);
1130 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1131 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1132 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1133 else {
1134 /* no chance for this in UTF-16 */
1135 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001136 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001137 return(-2);
1138 }
1139
1140 if (inend - in < trailing) {
1141 break;
1142 }
1143
1144 for ( ; trailing; trailing--) {
1145 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
1146 c <<= 6;
1147 c |= d & 0x3F;
1148 }
1149
1150 /* assertion: c is a single UTF-4 value */
1151 if (c < 0x10000) {
1152 if (out >= outend) break;
1153 if (xmlLittleEndian) {
1154 tmp = (unsigned char *) out;
1155 *tmp = c >> 8;
1156 *(tmp + 1) = c;
1157 out++;
1158 } else {
1159 *out++ = c;
1160 }
1161 }
1162 else if (c < 0x110000) {
1163 if (out+1 >= outend) break;
1164 c -= 0x10000;
1165 if (xmlLittleEndian) {
1166 tmp1 = 0xD800 | (c >> 10);
1167 tmp = (unsigned char *) out;
1168 *tmp = tmp1 >> 8;
1169 *(tmp + 1) = (unsigned char) tmp1;
1170 out++;
1171
1172 tmp2 = 0xDC00 | (c & 0x03FF);
1173 tmp = (unsigned char *) out;
1174 *tmp = tmp2 >> 8;
1175 *(tmp + 1) = (unsigned char) tmp2;
1176 out++;
1177 } else {
1178 *out++ = 0xD800 | (c >> 10);
1179 *out++ = 0xDC00 | (c & 0x03FF);
1180 }
1181 }
1182 else
1183 break;
1184 processed = in;
1185 }
1186 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001187 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +00001188 return(0);
1189}
1190
Daniel Veillard97ac1312001-05-30 19:14:17 +00001191/************************************************************************
1192 * *
1193 * Generic encoding handling routines *
1194 * *
1195 ************************************************************************/
1196
Owen Taylor3473f882001-02-23 17:55:21 +00001197/**
1198 * xmlDetectCharEncoding:
1199 * @in: a pointer to the first bytes of the XML entity, must be at least
1200 * 4 bytes long.
1201 * @len: pointer to the length of the buffer
1202 *
1203 * Guess the encoding of the entity using the first bytes of the entity content
1204 * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
1205 *
1206 * Returns one of the XML_CHAR_ENCODING_... values.
1207 */
1208xmlCharEncoding
1209xmlDetectCharEncoding(const unsigned char* in, int len)
1210{
1211 if (len >= 4) {
1212 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1213 (in[2] == 0x00) && (in[3] == 0x3C))
1214 return(XML_CHAR_ENCODING_UCS4BE);
1215 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1216 (in[2] == 0x00) && (in[3] == 0x00))
1217 return(XML_CHAR_ENCODING_UCS4LE);
1218 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1219 (in[2] == 0x3C) && (in[3] == 0x00))
1220 return(XML_CHAR_ENCODING_UCS4_2143);
1221 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1222 (in[2] == 0x00) && (in[3] == 0x00))
1223 return(XML_CHAR_ENCODING_UCS4_3412);
1224 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
1225 (in[2] == 0xA7) && (in[3] == 0x94))
1226 return(XML_CHAR_ENCODING_EBCDIC);
1227 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
1228 (in[2] == 0x78) && (in[3] == 0x6D))
1229 return(XML_CHAR_ENCODING_UTF8);
1230 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001231 if (len >= 3) {
1232 /*
1233 * Errata on XML-1.0 June 20 2001
1234 * We now allow an UTF8 encoded BOM
1235 */
1236 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1237 (in[2] == 0xBF))
1238 return(XML_CHAR_ENCODING_UTF8);
1239 }
Owen Taylor3473f882001-02-23 17:55:21 +00001240 if (len >= 2) {
1241 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1242 return(XML_CHAR_ENCODING_UTF16BE);
1243 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1244 return(XML_CHAR_ENCODING_UTF16LE);
1245 }
1246 return(XML_CHAR_ENCODING_NONE);
1247}
1248
1249/**
1250 * xmlCleanupEncodingAliases:
1251 *
1252 * Unregisters all aliases
1253 */
1254void
1255xmlCleanupEncodingAliases(void) {
1256 int i;
1257
1258 if (xmlCharEncodingAliases == NULL)
1259 return;
1260
1261 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1262 if (xmlCharEncodingAliases[i].name != NULL)
1263 xmlFree((char *) xmlCharEncodingAliases[i].name);
1264 if (xmlCharEncodingAliases[i].alias != NULL)
1265 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1266 }
1267 xmlCharEncodingAliasesNb = 0;
1268 xmlCharEncodingAliasesMax = 0;
1269 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +00001270 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001271}
1272
1273/**
1274 * xmlGetEncodingAlias:
1275 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1276 *
1277 * Lookup an encoding name for the given alias.
1278 *
1279 * Returns NULL if not found the original name otherwise
1280 */
1281const char *
1282xmlGetEncodingAlias(const char *alias) {
1283 int i;
1284 char upper[100];
1285
1286 if (alias == NULL)
1287 return(NULL);
1288
1289 if (xmlCharEncodingAliases == NULL)
1290 return(NULL);
1291
1292 for (i = 0;i < 99;i++) {
1293 upper[i] = toupper(alias[i]);
1294 if (upper[i] == 0) break;
1295 }
1296 upper[i] = 0;
1297
1298 /*
1299 * Walk down the list looking for a definition of the alias
1300 */
1301 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1302 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1303 return(xmlCharEncodingAliases[i].name);
1304 }
1305 }
1306 return(NULL);
1307}
1308
1309/**
1310 * xmlAddEncodingAlias:
1311 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1312 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1313 *
1314 * Registers and alias @alias for an encoding named @name. Existing alias
1315 * will be overwritten.
1316 *
1317 * Returns 0 in case of success, -1 in case of error
1318 */
1319int
1320xmlAddEncodingAlias(const char *name, const char *alias) {
1321 int i;
1322 char upper[100];
1323
1324 if ((name == NULL) || (alias == NULL))
1325 return(-1);
1326
1327 for (i = 0;i < 99;i++) {
1328 upper[i] = toupper(alias[i]);
1329 if (upper[i] == 0) break;
1330 }
1331 upper[i] = 0;
1332
1333 if (xmlCharEncodingAliases == NULL) {
1334 xmlCharEncodingAliasesNb = 0;
1335 xmlCharEncodingAliasesMax = 20;
1336 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1337 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1338 if (xmlCharEncodingAliases == NULL)
1339 return(-1);
1340 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1341 xmlCharEncodingAliasesMax *= 2;
1342 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1343 xmlRealloc(xmlCharEncodingAliases,
1344 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1345 }
1346 /*
1347 * Walk down the list looking for a definition of the alias
1348 */
1349 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1350 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1351 /*
1352 * Replace the definition.
1353 */
1354 xmlFree((char *) xmlCharEncodingAliases[i].name);
1355 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1356 return(0);
1357 }
1358 }
1359 /*
1360 * Add the definition
1361 */
1362 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1363 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1364 xmlCharEncodingAliasesNb++;
1365 return(0);
1366}
1367
1368/**
1369 * xmlDelEncodingAlias:
1370 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1371 *
1372 * Unregisters an encoding alias @alias
1373 *
1374 * Returns 0 in case of success, -1 in case of error
1375 */
1376int
1377xmlDelEncodingAlias(const char *alias) {
1378 int i;
1379
1380 if (alias == NULL)
1381 return(-1);
1382
1383 if (xmlCharEncodingAliases == NULL)
1384 return(-1);
1385 /*
1386 * Walk down the list looking for a definition of the alias
1387 */
1388 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1389 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1390 xmlFree((char *) xmlCharEncodingAliases[i].name);
1391 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1392 xmlCharEncodingAliasesNb--;
1393 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1394 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1395 return(0);
1396 }
1397 }
1398 return(-1);
1399}
1400
1401/**
1402 * xmlParseCharEncoding:
1403 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1404 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001405 * Compare the string to the known encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001406 * that the comparison is case insensitive accordingly to the section
1407 * [XML] 4.3.3 Character Encoding in Entities.
1408 *
1409 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1410 * if not recognized.
1411 */
1412xmlCharEncoding
1413xmlParseCharEncoding(const char* name)
1414{
1415 const char *alias;
1416 char upper[500];
1417 int i;
1418
1419 if (name == NULL)
1420 return(XML_CHAR_ENCODING_NONE);
1421
1422 /*
1423 * Do the alias resolution
1424 */
1425 alias = xmlGetEncodingAlias(name);
1426 if (alias != NULL)
1427 name = alias;
1428
1429 for (i = 0;i < 499;i++) {
1430 upper[i] = toupper(name[i]);
1431 if (upper[i] == 0) break;
1432 }
1433 upper[i] = 0;
1434
1435 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1436 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1437 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1438
1439 /*
1440 * NOTE: if we were able to parse this, the endianness of UTF16 is
1441 * already found and in use
1442 */
1443 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1444 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1445
1446 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1447 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1448 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1449
1450 /*
1451 * NOTE: if we were able to parse this, the endianness of UCS4 is
1452 * already found and in use
1453 */
1454 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1455 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1456 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1457
1458
1459 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1460 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1461 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1462
1463 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1464 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1465 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1466
1467 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1468 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1469 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1470 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1471 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1472 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1473 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1474
1475 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1476 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1477 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1478
1479#ifdef DEBUG_ENCODING
1480 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1481#endif
1482 return(XML_CHAR_ENCODING_ERROR);
1483}
1484
1485/**
1486 * xmlGetCharEncodingName:
1487 * @enc: the encoding
1488 *
1489 * The "canonical" name for XML encoding.
1490 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1491 * Section 4.3.3 Character Encoding in Entities
1492 *
1493 * Returns the canonical name for the given encoding
1494 */
1495
1496const char*
1497xmlGetCharEncodingName(xmlCharEncoding enc) {
1498 switch (enc) {
1499 case XML_CHAR_ENCODING_ERROR:
1500 return(NULL);
1501 case XML_CHAR_ENCODING_NONE:
1502 return(NULL);
1503 case XML_CHAR_ENCODING_UTF8:
1504 return("UTF-8");
1505 case XML_CHAR_ENCODING_UTF16LE:
1506 return("UTF-16");
1507 case XML_CHAR_ENCODING_UTF16BE:
1508 return("UTF-16");
1509 case XML_CHAR_ENCODING_EBCDIC:
1510 return("EBCDIC");
1511 case XML_CHAR_ENCODING_UCS4LE:
1512 return("ISO-10646-UCS-4");
1513 case XML_CHAR_ENCODING_UCS4BE:
1514 return("ISO-10646-UCS-4");
1515 case XML_CHAR_ENCODING_UCS4_2143:
1516 return("ISO-10646-UCS-4");
1517 case XML_CHAR_ENCODING_UCS4_3412:
1518 return("ISO-10646-UCS-4");
1519 case XML_CHAR_ENCODING_UCS2:
1520 return("ISO-10646-UCS-2");
1521 case XML_CHAR_ENCODING_8859_1:
1522 return("ISO-8859-1");
1523 case XML_CHAR_ENCODING_8859_2:
1524 return("ISO-8859-2");
1525 case XML_CHAR_ENCODING_8859_3:
1526 return("ISO-8859-3");
1527 case XML_CHAR_ENCODING_8859_4:
1528 return("ISO-8859-4");
1529 case XML_CHAR_ENCODING_8859_5:
1530 return("ISO-8859-5");
1531 case XML_CHAR_ENCODING_8859_6:
1532 return("ISO-8859-6");
1533 case XML_CHAR_ENCODING_8859_7:
1534 return("ISO-8859-7");
1535 case XML_CHAR_ENCODING_8859_8:
1536 return("ISO-8859-8");
1537 case XML_CHAR_ENCODING_8859_9:
1538 return("ISO-8859-9");
1539 case XML_CHAR_ENCODING_2022_JP:
1540 return("ISO-2022-JP");
1541 case XML_CHAR_ENCODING_SHIFT_JIS:
1542 return("Shift-JIS");
1543 case XML_CHAR_ENCODING_EUC_JP:
1544 return("EUC-JP");
1545 case XML_CHAR_ENCODING_ASCII:
1546 return(NULL);
1547 }
1548 return(NULL);
1549}
1550
Daniel Veillard97ac1312001-05-30 19:14:17 +00001551/************************************************************************
1552 * *
1553 * Char encoding handlers *
1554 * *
1555 ************************************************************************/
1556
Owen Taylor3473f882001-02-23 17:55:21 +00001557
1558/* the size should be growable, but it's not a big deal ... */
1559#define MAX_ENCODING_HANDLERS 50
1560static xmlCharEncodingHandlerPtr *handlers = NULL;
1561static int nbCharEncodingHandler = 0;
1562
1563/*
1564 * The default is UTF-8 for XML, that's also the default used for the
1565 * parser internals, so the default encoding handler is NULL
1566 */
1567
1568static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1569
1570/**
1571 * xmlNewCharEncodingHandler:
1572 * @name: the encoding name, in UTF-8 format (ASCII actually)
1573 * @input: the xmlCharEncodingInputFunc to read that encoding
1574 * @output: the xmlCharEncodingOutputFunc to write that encoding
1575 *
1576 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001577 *
Owen Taylor3473f882001-02-23 17:55:21 +00001578 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1579 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001580xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001581xmlNewCharEncodingHandler(const char *name,
1582 xmlCharEncodingInputFunc input,
1583 xmlCharEncodingOutputFunc output) {
1584 xmlCharEncodingHandlerPtr handler;
1585 const char *alias;
1586 char upper[500];
1587 int i;
1588 char *up = 0;
1589
1590 /*
1591 * Do the alias resolution
1592 */
1593 alias = xmlGetEncodingAlias(name);
1594 if (alias != NULL)
1595 name = alias;
1596
1597 /*
1598 * Keep only the uppercase version of the encoding.
1599 */
1600 if (name == NULL) {
1601 xmlGenericError(xmlGenericErrorContext,
1602 "xmlNewCharEncodingHandler : no name !\n");
1603 return(NULL);
1604 }
1605 for (i = 0;i < 499;i++) {
1606 upper[i] = toupper(name[i]);
1607 if (upper[i] == 0) break;
1608 }
1609 upper[i] = 0;
1610 up = xmlMemStrdup(upper);
1611 if (up == NULL) {
1612 xmlGenericError(xmlGenericErrorContext,
1613 "xmlNewCharEncodingHandler : out of memory !\n");
1614 return(NULL);
1615 }
1616
1617 /*
1618 * allocate and fill-up an handler block.
1619 */
1620 handler = (xmlCharEncodingHandlerPtr)
1621 xmlMalloc(sizeof(xmlCharEncodingHandler));
1622 if (handler == NULL) {
1623 xmlGenericError(xmlGenericErrorContext,
1624 "xmlNewCharEncodingHandler : out of memory !\n");
1625 return(NULL);
1626 }
1627 handler->input = input;
1628 handler->output = output;
1629 handler->name = up;
1630
1631#ifdef LIBXML_ICONV_ENABLED
1632 handler->iconv_in = NULL;
1633 handler->iconv_out = NULL;
1634#endif /* LIBXML_ICONV_ENABLED */
1635
1636 /*
1637 * registers and returns the handler.
1638 */
1639 xmlRegisterCharEncodingHandler(handler);
1640#ifdef DEBUG_ENCODING
1641 xmlGenericError(xmlGenericErrorContext,
1642 "Registered encoding handler for %s\n", name);
1643#endif
1644 return(handler);
1645}
1646
1647/**
1648 * xmlInitCharEncodingHandlers:
1649 *
1650 * Initialize the char encoding support, it registers the default
1651 * encoding supported.
1652 * NOTE: while public, this function usually doesn't need to be called
1653 * in normal processing.
1654 */
1655void
1656xmlInitCharEncodingHandlers(void) {
1657 unsigned short int tst = 0x1234;
1658 unsigned char *ptr = (unsigned char *) &tst;
1659
1660 if (handlers != NULL) return;
1661
1662 handlers = (xmlCharEncodingHandlerPtr *)
1663 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1664
1665 if (*ptr == 0x12) xmlLittleEndian = 0;
1666 else if (*ptr == 0x34) xmlLittleEndian = 1;
1667 else xmlGenericError(xmlGenericErrorContext,
1668 "Odd problem at endianness detection\n");
1669
1670 if (handlers == NULL) {
1671 xmlGenericError(xmlGenericErrorContext,
1672 "xmlInitCharEncodingHandlers : out of memory !\n");
1673 return;
1674 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001675 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Owen Taylor3473f882001-02-23 17:55:21 +00001676 xmlUTF16LEHandler =
1677 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1678 xmlUTF16BEHandler =
1679 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1680 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1681 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001682 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001683#ifdef LIBXML_HTML_ENABLED
1684 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1685#endif
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001686#ifndef LIBXML_ICONV_ENABLED
1687#ifdef LIBXML_ISO8859X_ENABLED
1688 xmlRegisterCharEncodingHandlersISO8859x ();
1689#endif
1690#endif
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692}
1693
1694/**
1695 * xmlCleanupCharEncodingHandlers:
1696 *
1697 * Cleanup the memory allocated for the char encoding support, it
1698 * unregisters all the encoding handlers and the aliases.
1699 */
1700void
1701xmlCleanupCharEncodingHandlers(void) {
1702 xmlCleanupEncodingAliases();
1703
1704 if (handlers == NULL) return;
1705
1706 for (;nbCharEncodingHandler > 0;) {
1707 nbCharEncodingHandler--;
1708 if (handlers[nbCharEncodingHandler] != NULL) {
1709 if (handlers[nbCharEncodingHandler]->name != NULL)
1710 xmlFree(handlers[nbCharEncodingHandler]->name);
1711 xmlFree(handlers[nbCharEncodingHandler]);
1712 }
1713 }
1714 xmlFree(handlers);
1715 handlers = NULL;
1716 nbCharEncodingHandler = 0;
1717 xmlDefaultCharEncodingHandler = NULL;
1718}
1719
1720/**
1721 * xmlRegisterCharEncodingHandler:
1722 * @handler: the xmlCharEncodingHandlerPtr handler block
1723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001724 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001725 */
1726void
1727xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1728 if (handlers == NULL) xmlInitCharEncodingHandlers();
1729 if (handler == NULL) {
1730 xmlGenericError(xmlGenericErrorContext,
1731 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1732 return;
1733 }
1734
1735 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1736 xmlGenericError(xmlGenericErrorContext,
1737 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1738 xmlGenericError(xmlGenericErrorContext,
1739 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1740 return;
1741 }
1742 handlers[nbCharEncodingHandler++] = handler;
1743}
1744
1745/**
1746 * xmlGetCharEncodingHandler:
1747 * @enc: an xmlCharEncoding value.
1748 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001749 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001750 *
1751 * Returns the handler or NULL if not found
1752 */
1753xmlCharEncodingHandlerPtr
1754xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1755 xmlCharEncodingHandlerPtr handler;
1756
1757 if (handlers == NULL) xmlInitCharEncodingHandlers();
1758 switch (enc) {
1759 case XML_CHAR_ENCODING_ERROR:
1760 return(NULL);
1761 case XML_CHAR_ENCODING_NONE:
1762 return(NULL);
1763 case XML_CHAR_ENCODING_UTF8:
1764 return(NULL);
1765 case XML_CHAR_ENCODING_UTF16LE:
1766 return(xmlUTF16LEHandler);
1767 case XML_CHAR_ENCODING_UTF16BE:
1768 return(xmlUTF16BEHandler);
1769 case XML_CHAR_ENCODING_EBCDIC:
1770 handler = xmlFindCharEncodingHandler("EBCDIC");
1771 if (handler != NULL) return(handler);
1772 handler = xmlFindCharEncodingHandler("ebcdic");
1773 if (handler != NULL) return(handler);
1774 break;
1775 case XML_CHAR_ENCODING_UCS4BE:
1776 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1777 if (handler != NULL) return(handler);
1778 handler = xmlFindCharEncodingHandler("UCS-4");
1779 if (handler != NULL) return(handler);
1780 handler = xmlFindCharEncodingHandler("UCS4");
1781 if (handler != NULL) return(handler);
1782 break;
1783 case XML_CHAR_ENCODING_UCS4LE:
1784 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1785 if (handler != NULL) return(handler);
1786 handler = xmlFindCharEncodingHandler("UCS-4");
1787 if (handler != NULL) return(handler);
1788 handler = xmlFindCharEncodingHandler("UCS4");
1789 if (handler != NULL) return(handler);
1790 break;
1791 case XML_CHAR_ENCODING_UCS4_2143:
1792 break;
1793 case XML_CHAR_ENCODING_UCS4_3412:
1794 break;
1795 case XML_CHAR_ENCODING_UCS2:
1796 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1797 if (handler != NULL) return(handler);
1798 handler = xmlFindCharEncodingHandler("UCS-2");
1799 if (handler != NULL) return(handler);
1800 handler = xmlFindCharEncodingHandler("UCS2");
1801 if (handler != NULL) return(handler);
1802 break;
1803
1804 /*
1805 * We used to keep ISO Latin encodings native in the
1806 * generated data. This led to so many problems that
1807 * this has been removed. One can still change this
1808 * back by registering no-ops encoders for those
1809 */
1810 case XML_CHAR_ENCODING_8859_1:
1811 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1812 if (handler != NULL) return(handler);
1813 break;
1814 case XML_CHAR_ENCODING_8859_2:
1815 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1816 if (handler != NULL) return(handler);
1817 break;
1818 case XML_CHAR_ENCODING_8859_3:
1819 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1820 if (handler != NULL) return(handler);
1821 break;
1822 case XML_CHAR_ENCODING_8859_4:
1823 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1824 if (handler != NULL) return(handler);
1825 break;
1826 case XML_CHAR_ENCODING_8859_5:
1827 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1828 if (handler != NULL) return(handler);
1829 break;
1830 case XML_CHAR_ENCODING_8859_6:
1831 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1832 if (handler != NULL) return(handler);
1833 break;
1834 case XML_CHAR_ENCODING_8859_7:
1835 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1836 if (handler != NULL) return(handler);
1837 break;
1838 case XML_CHAR_ENCODING_8859_8:
1839 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1840 if (handler != NULL) return(handler);
1841 break;
1842 case XML_CHAR_ENCODING_8859_9:
1843 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1844 if (handler != NULL) return(handler);
1845 break;
1846
1847
1848 case XML_CHAR_ENCODING_2022_JP:
1849 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1850 if (handler != NULL) return(handler);
1851 break;
1852 case XML_CHAR_ENCODING_SHIFT_JIS:
1853 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1854 if (handler != NULL) return(handler);
1855 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1856 if (handler != NULL) return(handler);
1857 handler = xmlFindCharEncodingHandler("Shift_JIS");
1858 if (handler != NULL) return(handler);
1859 break;
1860 case XML_CHAR_ENCODING_EUC_JP:
1861 handler = xmlFindCharEncodingHandler("EUC-JP");
1862 if (handler != NULL) return(handler);
1863 break;
1864 default:
1865 break;
1866 }
1867
1868#ifdef DEBUG_ENCODING
1869 xmlGenericError(xmlGenericErrorContext,
1870 "No handler found for encoding %d\n", enc);
1871#endif
1872 return(NULL);
1873}
1874
1875/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001876 * xmlFindCharEncodingHandler:
1877 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001878 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001879 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001880 *
1881 * Returns the handler or NULL if not found
1882 */
1883xmlCharEncodingHandlerPtr
1884xmlFindCharEncodingHandler(const char *name) {
1885 const char *nalias;
1886 const char *norig;
1887 xmlCharEncoding alias;
1888#ifdef LIBXML_ICONV_ENABLED
1889 xmlCharEncodingHandlerPtr enc;
1890 iconv_t icv_in, icv_out;
1891#endif /* LIBXML_ICONV_ENABLED */
1892 char upper[100];
1893 int i;
1894
1895 if (handlers == NULL) xmlInitCharEncodingHandlers();
1896 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1897 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1898
1899 /*
1900 * Do the alias resolution
1901 */
1902 norig = name;
1903 nalias = xmlGetEncodingAlias(name);
1904 if (nalias != NULL)
1905 name = nalias;
1906
1907 /*
1908 * Check first for directly registered encoding names
1909 */
1910 for (i = 0;i < 99;i++) {
1911 upper[i] = toupper(name[i]);
1912 if (upper[i] == 0) break;
1913 }
1914 upper[i] = 0;
1915
1916 for (i = 0;i < nbCharEncodingHandler; i++)
1917 if (!strcmp(upper, handlers[i]->name)) {
1918#ifdef DEBUG_ENCODING
1919 xmlGenericError(xmlGenericErrorContext,
1920 "Found registered handler for encoding %s\n", name);
1921#endif
1922 return(handlers[i]);
1923 }
1924
1925#ifdef LIBXML_ICONV_ENABLED
1926 /* check whether iconv can handle this */
1927 icv_in = iconv_open("UTF-8", name);
1928 icv_out = iconv_open(name, "UTF-8");
1929 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1930 enc = (xmlCharEncodingHandlerPtr)
1931 xmlMalloc(sizeof(xmlCharEncodingHandler));
1932 if (enc == NULL) {
1933 iconv_close(icv_in);
1934 iconv_close(icv_out);
1935 return(NULL);
1936 }
1937 enc->name = xmlMemStrdup(name);
1938 enc->input = NULL;
1939 enc->output = NULL;
1940 enc->iconv_in = icv_in;
1941 enc->iconv_out = icv_out;
1942#ifdef DEBUG_ENCODING
1943 xmlGenericError(xmlGenericErrorContext,
1944 "Found iconv handler for encoding %s\n", name);
1945#endif
1946 return enc;
1947 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1948 xmlGenericError(xmlGenericErrorContext,
1949 "iconv : problems with filters for '%s'\n", name);
1950 }
1951#endif /* LIBXML_ICONV_ENABLED */
1952
1953#ifdef DEBUG_ENCODING
1954 xmlGenericError(xmlGenericErrorContext,
1955 "No handler found for encoding %s\n", name);
1956#endif
1957
1958 /*
1959 * Fallback using the canonical names
1960 */
1961 alias = xmlParseCharEncoding(norig);
1962 if (alias != XML_CHAR_ENCODING_ERROR) {
1963 const char* canon;
1964 canon = xmlGetCharEncodingName(alias);
1965 if ((canon != NULL) && (strcmp(name, canon))) {
1966 return(xmlFindCharEncodingHandler(canon));
1967 }
1968 }
1969
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00001970 /*
1971 * If nothing was found and it is "UTF-16" then use the Little indian
1972 * version.
1973 */
1974 if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
1975 (xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
1976 return(xmlUTF16LEHandler);
1977
Owen Taylor3473f882001-02-23 17:55:21 +00001978 return(NULL);
1979}
1980
Daniel Veillard97ac1312001-05-30 19:14:17 +00001981/************************************************************************
1982 * *
1983 * ICONV based generic conversion functions *
1984 * *
1985 ************************************************************************/
1986
Owen Taylor3473f882001-02-23 17:55:21 +00001987#ifdef LIBXML_ICONV_ENABLED
1988/**
1989 * xmlIconvWrapper:
1990 * @cd: iconv converter data structure
1991 * @out: a pointer to an array of bytes to store the result
1992 * @outlen: the length of @out
1993 * @in: a pointer to an array of ISO Latin 1 chars
1994 * @inlen: the length of @in
1995 *
1996 * Returns 0 if success, or
1997 * -1 by lack of space, or
1998 * -2 if the transcoding fails (for *in is not valid utf8 string or
1999 * the result of transformation can't fit into the encoding we want), or
2000 * -3 if there the last byte can't form a single output char.
2001 *
2002 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002003 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00002004 * The value of @outlen after return is the number of ocetes consumed.
2005 */
2006static int
2007xmlIconvWrapper(iconv_t cd,
Daniel Veillard9403a042001-05-28 11:00:53 +00002008 unsigned char *out, int *outlen,
2009 const unsigned char *in, int *inlen) {
Owen Taylor3473f882001-02-23 17:55:21 +00002010
Daniel Veillard9403a042001-05-28 11:00:53 +00002011 size_t icv_inlen = *inlen, icv_outlen = *outlen;
2012 const char *icv_in = (const char *) in;
2013 char *icv_out = (char *) out;
2014 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002015
Darin Adler699613b2001-07-27 22:47:14 +00002016 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00002017 if (in != NULL) {
2018 *inlen -= icv_inlen;
2019 *outlen -= icv_outlen;
2020 } else {
2021 *inlen = 0;
2022 *outlen = 0;
2023 }
2024 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002025#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00002026 if (errno == EILSEQ) {
2027 return -2;
2028 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002029#endif
2030#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00002031 if (errno == E2BIG) {
2032 return -1;
2033 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002034#endif
2035#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00002036 if (errno == EINVAL) {
2037 return -3;
2038 } else
Owen Taylor3473f882001-02-23 17:55:21 +00002039#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00002040 {
2041 return -3;
2042 }
2043 }
2044 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002045}
2046#endif /* LIBXML_ICONV_ENABLED */
2047
Daniel Veillard97ac1312001-05-30 19:14:17 +00002048/************************************************************************
2049 * *
2050 * The real API used by libxml for on-the-fly conversion *
2051 * *
2052 ************************************************************************/
2053
Owen Taylor3473f882001-02-23 17:55:21 +00002054/**
2055 * xmlCharEncFirstLine:
2056 * @handler: char enconding transformation data structure
2057 * @out: an xmlBuffer for the output.
2058 * @in: an xmlBuffer for the input
2059 *
2060 * Front-end for the encoding handler input function, but handle only
2061 * the very first line, i.e. limit itself to 45 chars.
2062 *
2063 * Returns the number of byte written if success, or
2064 * -1 general error
2065 * -2 if the transcoding fails (for *in is not valid utf8 string or
2066 * the result of transformation can't fit into the encoding we want), or
2067 */
2068int
2069xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2070 xmlBufferPtr in) {
2071 int ret = -2;
2072 int written;
2073 int toconv;
2074
2075 if (handler == NULL) return(-1);
2076 if (out == NULL) return(-1);
2077 if (in == NULL) return(-1);
2078
2079 written = out->size - out->use;
2080 toconv = in->use;
2081 if (toconv * 2 >= written) {
2082 xmlBufferGrow(out, toconv);
2083 written = out->size - out->use - 1;
2084 }
2085
2086 /*
2087 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2088 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002089 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00002090 */
2091 written = 45;
2092
2093 if (handler->input != NULL) {
2094 ret = handler->input(&out->content[out->use], &written,
2095 in->content, &toconv);
2096 xmlBufferShrink(in, toconv);
2097 out->use += written;
2098 out->content[out->use] = 0;
2099 }
2100#ifdef LIBXML_ICONV_ENABLED
2101 else if (handler->iconv_in != NULL) {
2102 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2103 &written, in->content, &toconv);
2104 xmlBufferShrink(in, toconv);
2105 out->use += written;
2106 out->content[out->use] = 0;
2107 if (ret == -1) ret = -3;
2108 }
2109#endif /* LIBXML_ICONV_ENABLED */
2110#ifdef DEBUG_ENCODING
2111 switch (ret) {
2112 case 0:
2113 xmlGenericError(xmlGenericErrorContext,
2114 "converted %d bytes to %d bytes of input\n",
2115 toconv, written);
2116 break;
2117 case -1:
2118 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2119 toconv, written, in->use);
2120 break;
2121 case -2:
2122 xmlGenericError(xmlGenericErrorContext,
2123 "input conversion failed due to input error\n");
2124 break;
2125 case -3:
2126 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2127 toconv, written, in->use);
2128 break;
2129 default:
2130 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2131 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002132#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002133 /*
2134 * Ignore when input buffer is not on a boundary
2135 */
2136 if (ret == -3) ret = 0;
2137 if (ret == -1) ret = 0;
2138 return(ret);
2139}
2140
2141/**
2142 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002143 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002144 * @out: an xmlBuffer for the output.
2145 * @in: an xmlBuffer for the input
2146 *
2147 * Generic front-end for the encoding handler input function
2148 *
2149 * Returns the number of byte written if success, or
2150 * -1 general error
2151 * -2 if the transcoding fails (for *in is not valid utf8 string or
2152 * the result of transformation can't fit into the encoding we want), or
2153 */
2154int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002155xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2156 xmlBufferPtr in)
2157{
Owen Taylor3473f882001-02-23 17:55:21 +00002158 int ret = -2;
2159 int written;
2160 int toconv;
2161
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002162 if (handler == NULL)
2163 return (-1);
2164 if (out == NULL)
2165 return (-1);
2166 if (in == NULL)
2167 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002168
2169 toconv = in->use;
2170 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002171 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00002172 written = out->size - out->use;
2173 if (toconv * 2 >= written) {
2174 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002175 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002176 }
2177 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002178 ret = handler->input(&out->content[out->use], &written,
2179 in->content, &toconv);
2180 xmlBufferShrink(in, toconv);
2181 out->use += written;
2182 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002183 }
2184#ifdef LIBXML_ICONV_ENABLED
2185 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002186 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2187 &written, in->content, &toconv);
2188 xmlBufferShrink(in, toconv);
2189 out->use += written;
2190 out->content[out->use] = 0;
2191 if (ret == -1)
2192 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002193 }
2194#endif /* LIBXML_ICONV_ENABLED */
2195 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002196 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002197#ifdef DEBUG_ENCODING
2198 xmlGenericError(xmlGenericErrorContext,
2199 "converted %d bytes to %d bytes of input\n",
2200 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002201#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002202 break;
2203 case -1:
2204#ifdef DEBUG_ENCODING
2205 xmlGenericError(xmlGenericErrorContext,
2206 "converted %d bytes to %d bytes of input, %d left\n",
2207 toconv, written, in->use);
2208#endif
2209 break;
2210 case -3:
2211#ifdef DEBUG_ENCODING
2212 xmlGenericError(xmlGenericErrorContext,
2213 "converted %d bytes to %d bytes of input, %d left\n",
2214 toconv, written, in->use);
2215#endif
2216 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002218 xmlGenericError(xmlGenericErrorContext,
2219 "input conversion failed due to input error\n");
2220 xmlGenericError(xmlGenericErrorContext,
2221 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2222 in->content[0], in->content[1],
2223 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00002224 }
2225 /*
2226 * Ignore when input buffer is not on a boundary
2227 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002228 if (ret == -3)
2229 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00002230 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00002231}
2232
2233/**
2234 * xmlCharEncOutFunc:
2235 * @handler: char enconding transformation data structure
2236 * @out: an xmlBuffer for the output.
2237 * @in: an xmlBuffer for the input
2238 *
2239 * Generic front-end for the encoding handler output function
2240 * a first call with @in == NULL has to be made firs to initiate the
2241 * output in case of non-stateless encoding needing to initiate their
2242 * state or the output (like the BOM in UTF16).
2243 * In case of UTF8 sequence conversion errors for the given encoder,
2244 * the content will be automatically remapped to a CharRef sequence.
2245 *
2246 * Returns the number of byte written if success, or
2247 * -1 general error
2248 * -2 if the transcoding fails (for *in is not valid utf8 string or
2249 * the result of transformation can't fit into the encoding we want), or
2250 */
2251int
2252xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2253 xmlBufferPtr in) {
2254 int ret = -2;
2255 int written;
2256 int writtentot = 0;
2257 int toconv;
2258 int output = 0;
2259
2260 if (handler == NULL) return(-1);
2261 if (out == NULL) return(-1);
2262
2263retry:
2264
2265 written = out->size - out->use;
2266
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002267 if (written > 0)
2268 written--; /* Gennady: count '/0' */
2269
Owen Taylor3473f882001-02-23 17:55:21 +00002270 /*
2271 * First specific handling of in = NULL, i.e. the initialization call
2272 */
2273 if (in == NULL) {
2274 toconv = 0;
2275 if (handler->output != NULL) {
2276 ret = handler->output(&out->content[out->use], &written,
2277 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002278 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002279 out->use += written;
2280 out->content[out->use] = 0;
2281 }
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
2283#ifdef LIBXML_ICONV_ENABLED
2284 else if (handler->iconv_out != NULL) {
2285 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2286 &written, NULL, &toconv);
2287 out->use += written;
2288 out->content[out->use] = 0;
2289 }
2290#endif /* LIBXML_ICONV_ENABLED */
2291#ifdef DEBUG_ENCODING
2292 xmlGenericError(xmlGenericErrorContext,
2293 "initialized encoder\n");
2294#endif
2295 return(0);
2296 }
2297
2298 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002299 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002300 */
2301 toconv = in->use;
2302 if (toconv == 0)
2303 return(0);
2304 if (toconv * 2 >= written) {
2305 xmlBufferGrow(out, toconv * 2);
2306 written = out->size - out->use - 1;
2307 }
2308 if (handler->output != NULL) {
2309 ret = handler->output(&out->content[out->use], &written,
2310 in->content, &toconv);
2311 xmlBufferShrink(in, toconv);
2312 out->use += written;
2313 writtentot += written;
2314 out->content[out->use] = 0;
2315 }
2316#ifdef LIBXML_ICONV_ENABLED
2317 else if (handler->iconv_out != NULL) {
2318 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2319 &written, in->content, &toconv);
2320 xmlBufferShrink(in, toconv);
2321 out->use += written;
2322 writtentot += written;
2323 out->content[out->use] = 0;
2324 if (ret == -1) {
2325 if (written > 0) {
2326 /*
2327 * Can be a limitation of iconv
2328 */
2329 goto retry;
2330 }
2331 ret = -3;
2332 }
2333 }
2334#endif /* LIBXML_ICONV_ENABLED */
2335 else {
2336 xmlGenericError(xmlGenericErrorContext,
2337 "xmlCharEncOutFunc: no output function !\n");
2338 return(-1);
2339 }
2340
2341 if (ret >= 0) output += ret;
2342
2343 /*
2344 * Attempt to handle error cases
2345 */
2346 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002347 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002348#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002349 xmlGenericError(xmlGenericErrorContext,
2350 "converted %d bytes to %d bytes of output\n",
2351 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002352#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002353 break;
2354 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002355#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002356 xmlGenericError(xmlGenericErrorContext,
2357 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002358#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002359 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002360 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002361#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002362 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2363 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002364#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002365 break;
2366 case -2: {
2367 int len = in->use;
2368 const xmlChar *utf = (const xmlChar *) in->content;
2369 int cur;
2370
2371 cur = xmlGetUTF8Char(utf, &len);
2372 if (cur > 0) {
2373 xmlChar charref[20];
2374
2375#ifdef DEBUG_ENCODING
2376 xmlGenericError(xmlGenericErrorContext,
2377 "handling output conversion error\n");
2378 xmlGenericError(xmlGenericErrorContext,
2379 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2380 in->content[0], in->content[1],
2381 in->content[2], in->content[3]);
2382#endif
2383 /*
2384 * Removes the UTF8 sequence, and replace it by a charref
2385 * and continue the transcoding phase, hoping the error
2386 * did not mangle the encoder state.
2387 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002388 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002389 xmlBufferShrink(in, len);
2390 xmlBufferAddHead(in, charref, -1);
2391
2392 goto retry;
2393 } else {
2394 xmlGenericError(xmlGenericErrorContext,
2395 "output conversion failed due to conv error\n");
2396 xmlGenericError(xmlGenericErrorContext,
2397 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2398 in->content[0], in->content[1],
2399 in->content[2], in->content[3]);
2400 in->content[0] = ' ';
2401 }
2402 break;
2403 }
2404 }
2405 return(ret);
2406}
2407
2408/**
2409 * xmlCharEncCloseFunc:
2410 * @handler: char enconding transformation data structure
2411 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002412 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002413 *
2414 * Returns 0 if success, or -1 in case of error
2415 */
2416int
2417xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2418 int ret = 0;
2419 if (handler == NULL) return(-1);
2420 if (handler->name == NULL) return(-1);
2421#ifdef LIBXML_ICONV_ENABLED
2422 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002423 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002424 * and the associated icon resources.
2425 */
2426 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2427 if (handler->name != NULL)
2428 xmlFree(handler->name);
2429 handler->name = NULL;
2430 if (handler->iconv_out != NULL) {
2431 if (iconv_close(handler->iconv_out))
2432 ret = -1;
2433 handler->iconv_out = NULL;
2434 }
2435 if (handler->iconv_in != NULL) {
2436 if (iconv_close(handler->iconv_in))
2437 ret = -1;
2438 handler->iconv_in = NULL;
2439 }
2440 xmlFree(handler);
2441 }
2442#endif /* LIBXML_ICONV_ENABLED */
2443#ifdef DEBUG_ENCODING
2444 if (ret)
2445 xmlGenericError(xmlGenericErrorContext,
2446 "failed to close the encoding handler\n");
2447 else
2448 xmlGenericError(xmlGenericErrorContext,
2449 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002450#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002451
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(ret);
2453}
2454
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002455#ifndef LIBXML_ICONV_ENABLED
2456#ifdef LIBXML_ISO8859X_ENABLED
2457
2458/**
2459 * UTF8ToISO8859x:
2460 * @out: a pointer to an array of bytes to store the result
2461 * @outlen: the length of @out
2462 * @in: a pointer to an array of UTF-8 chars
2463 * @inlen: the length of @in
2464 * @xlattable: the 2-level transcoding table
2465 *
2466 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2467 * block of chars out.
2468 *
2469 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2470 * The value of @inlen after return is the number of octets consumed
2471 * as the return value is positive, else unpredictable.
2472 * The value of @outlen after return is the number of ocetes consumed.
2473 */
2474static int
2475UTF8ToISO8859x(unsigned char* out, int *outlen,
2476 const unsigned char* in, int *inlen,
2477 unsigned char const *xlattable) {
2478 const unsigned char* outend;
2479 const unsigned char* outstart = out;
2480 const unsigned char* inend;
2481 const unsigned char* instart = in;
2482
2483 if (in == NULL) {
2484 /*
2485 * initialization nothing to do
2486 */
2487 *outlen = 0;
2488 *inlen = 0;
2489 return(0);
2490 }
2491 inend = in + (*inlen);
2492 outend = out + (*outlen);
2493 while (in < inend) {
2494 unsigned char d = *in++;
2495 if (d < 0x80) {
2496 *out++ = d;
2497 } else if (d < 0xC0) {
2498 /* trailing byte in leading position */
2499 *outlen = out - outstart;
2500 *inlen = in - instart - 1;
2501 return(-2);
2502 } else if (d < 0xE0) {
2503 unsigned char c;
2504 if (!(in < inend)) {
2505 /* trailing byte not in input buffer */
2506 *outlen = out - outstart;
2507 *inlen = in - instart - 1;
2508 return(-2);
2509 }
2510 c = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002511 if ((c & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002512 /* not a trailing byte */
2513 *outlen = out - outstart;
2514 *inlen = in - instart - 2;
2515 return(-2);
2516 }
2517 c = c & 0x3F;
2518 d = d & 0x1F;
2519 d = xlattable [48 + c + xlattable [d] * 64];
2520 if (d == 0) {
2521 /* not in character set */
2522 *outlen = out - outstart;
2523 *inlen = in - instart - 2;
2524 return(-2);
2525 }
2526 *out++ = d;
2527 } else if (d < 0xF0) {
2528 unsigned char c1;
2529 unsigned char c2;
2530 if (!(in < inend - 1)) {
2531 /* trailing bytes not in input buffer */
2532 *outlen = out - outstart;
2533 *inlen = in - instart - 1;
2534 return(-2);
2535 }
2536 c1 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002537 if ((c1 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002538 /* not a trailing byte (c1) */
2539 *outlen = out - outstart;
2540 *inlen = in - instart - 2;
2541 return(-2);
2542 }
2543 c2 = *in++;
William M. Brack16db7b62003-08-07 13:12:49 +00002544 if ((c2 & 0xC0) != 0xC0) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002545 /* not a trailing byte (c2) */
2546 *outlen = out - outstart;
2547 *inlen = in - instart - 2;
2548 return(-2);
2549 }
2550 c1 = c1 & 0x3F;
2551 c2 = c2 & 0x3F;
2552 d = d & 0x0F;
2553 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2554 if (d == 0) {
2555 /* not in character set */
2556 *outlen = out - outstart;
2557 *inlen = in - instart - 3;
2558 return(-2);
2559 }
2560 *out++ = d;
2561 } else {
2562 /* cannot transcode >= U+010000 */
2563 *outlen = out - outstart;
2564 *inlen = in - instart - 1;
2565 return(-2);
2566 }
2567 }
2568 *outlen = out - outstart;
2569 *inlen = in - instart;
2570 return(0);
2571}
2572
2573/**
2574 * ISO8859xToUTF8
2575 * @out: a pointer to an array of bytes to store the result
2576 * @outlen: the length of @out
2577 * @in: a pointer to an array of ISO Latin 1 chars
2578 * @inlen: the length of @in
2579 *
2580 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2581 * block of chars out.
2582 * Returns 0 if success, or -1 otherwise
2583 * The value of @inlen after return is the number of octets consumed
2584 * The value of @outlen after return is the number of ocetes produced.
2585 */
2586static int
2587ISO8859xToUTF8(unsigned char* out, int *outlen,
2588 const unsigned char* in, int *inlen,
2589 unsigned short const *unicodetable) {
2590 unsigned char* outstart = out;
2591 unsigned char* outend = out + *outlen;
2592 const unsigned char* instart = in;
2593 const unsigned char* inend = in + *inlen;
2594 const unsigned char* instop = inend;
2595 unsigned int c = *in;
2596
2597 while (in < inend && out < outend - 1) {
2598 if (c >= 0x80) {
2599 c = unicodetable [c - 0x80];
2600 if (c == 0) {
2601 /* undefined code point */
2602 *outlen = out - outstart;
2603 *inlen = in - instart;
2604 return (-1);
2605 }
2606 if (c < 0x800) {
2607 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2608 *out++ = (c & 0x3F) | 0x80;
2609 } else {
2610 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2611 *out++ = ((c >> 6) & 0x3F) | 0x80;
2612 *out++ = (c & 0x3F) | 0x80;
2613 }
2614 ++in;
2615 c = *in;
2616 }
2617 if (instop - in > outend - out) instop = in + (outend - out);
2618 while (c < 0x80 && in < instop) {
2619 *out++ = c;
2620 ++in;
2621 c = *in;
2622 }
2623 }
2624 if (in < inend && out < outend && c < 0x80) {
2625 *out++ = c;
2626 ++in;
2627 }
2628 *outlen = out - outstart;
2629 *inlen = in - instart;
2630 return (0);
2631}
2632
2633
2634/************************************************************************
2635 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2636 ************************************************************************/
2637
2638static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2639 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2640 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2641 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2642 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2643 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2644 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2645 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2646 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2647 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2648 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2649 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2650 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2651 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2652 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2653 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2654 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2655};
2656
2657static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2658 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2665 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2666 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2667 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2668 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2669 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2670 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2673 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2674 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2677 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2678 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2679 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2680 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2681 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2682 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2683 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2684 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2685};
2686
2687static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2688 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2689 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2690 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2691 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2692 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2693 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2694 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2695 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2696 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2697 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2698 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2699 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2700 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2701 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2702 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2703 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2704};
2705
2706static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2707 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2715 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2716 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2717 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2718 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2720 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2733 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2734 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2735 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2736 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2737 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2738};
2739
2740static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2741 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2742 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2743 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2744 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2745 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2746 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2747 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2748 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2749 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2750 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2751 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2752 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2753 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2754 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2755 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2756 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2757};
2758
2759static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2760 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2767 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2768 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2769 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2770 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2771 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2772 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2773 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2774 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2775 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2776 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2777 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2778 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2784 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2785 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2786 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2787};
2788
2789static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2790 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2791 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2792 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2793 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2794 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2795 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2796 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2797 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2798 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2799 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2800 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2801 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2802 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2803 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2804 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2805 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2806};
2807
2808static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2809 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2816 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2817 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2818 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2821 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2822 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2823 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2824 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2825 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836};
2837
2838static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2839 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2840 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2841 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2842 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2843 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2844 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2845 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2846 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2847 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2848 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2849 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2850 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2851 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2852 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2853 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2854 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2855};
2856
2857static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2858 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2866 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2867 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2870 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2875 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2876 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2877 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2878 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881};
2882
2883static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2884 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2885 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2886 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2887 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2888 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2889 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2890 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2891 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2892 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2893 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2894 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2895 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2896 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2897 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2898 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2899 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2900};
2901
2902static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2903 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2911 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2912 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2913 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2914 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2927 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2928 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2929 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2930 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2932 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934};
2935
2936static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2937 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2938 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2939 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2940 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2941 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2942 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2943 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2944 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2945 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2946 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2947 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2948 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2949 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2950 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2951 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2952 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2953};
2954
2955static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2956 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2964 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2965 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2966 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2967 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2972 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2980 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2985 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987};
2988
2989static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2990 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2991 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2992 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2993 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2994 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2995 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2996 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2997 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2998 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2999 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3000 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3001 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3002 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3003 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3004 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3005 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3006};
3007
3008static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3009 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3017 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3018 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3019 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3020 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3021 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3022 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3023 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3024 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032};
3033
3034static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3035 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3036 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3037 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3038 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3039 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3040 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3041 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3042 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3043 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3044 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3045 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3046 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3047 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3048 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3049 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3050 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3051};
3052
3053static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3054 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3062 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3063 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3064 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3066 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3068 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3072 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3073 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3079 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3080 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3081 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3082 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3083 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3084 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3085};
3086
3087static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3088 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3089 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3090 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3091 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3092 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3093 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3094 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3095 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3096 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3097 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3098 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3099 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3100 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3101 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3102 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3103 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3104};
3105
3106static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3107 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3115 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3116 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3122 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3123 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3124 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3125 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3126 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3131 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134};
3135
3136static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3137 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3138 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3139 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3140 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3141 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3142 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3143 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3144 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3145 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3146 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3147 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3148 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3149 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3150 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3151 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3152 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3153};
3154
3155static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3156 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3164 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3165 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3166 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3167 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3176 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3177 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3179 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3181 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3182 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3183 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3184 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3186 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3187};
3188
3189static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3190 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3195 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3196 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3197 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3198 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3199 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3200 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3201 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3202 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3203 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3204 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3205 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3206};
3207
3208static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3209 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3219 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3224 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3244 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3247 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3249 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3250 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3251 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3252};
3253
3254static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3255 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3256 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3257 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3258 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3259 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3260 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3261 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3262 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3263 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3264 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3265 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3266 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3267 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3268 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3269 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3270 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3271};
3272
3273static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3274 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3282 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3283 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3284 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3285 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3297 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3298 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3299 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3300 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3301};
3302
3303static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3304 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3305 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3306 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3307 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3308 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3309 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3310 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3311 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3312 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3313 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3314 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3315 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3316 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3317 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3318 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3319 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3320};
3321
3322static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3323 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3331 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3332 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3333 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3334 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3335 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3340 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3342 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3352 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3359 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3360 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3361 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3362};
3363
3364
3365/*
3366 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3367 */
3368
3369static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3370 const unsigned char* in, int *inlen) {
3371 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3372}
3373static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3374 const unsigned char* in, int *inlen) {
3375 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3376}
3377
3378static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3379 const unsigned char* in, int *inlen) {
3380 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3381}
3382static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3383 const unsigned char* in, int *inlen) {
3384 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3385}
3386
3387static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3388 const unsigned char* in, int *inlen) {
3389 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3390}
3391static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3392 const unsigned char* in, int *inlen) {
3393 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3394}
3395
3396static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3397 const unsigned char* in, int *inlen) {
3398 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3399}
3400static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3401 const unsigned char* in, int *inlen) {
3402 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3403}
3404
3405static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3406 const unsigned char* in, int *inlen) {
3407 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3408}
3409static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3410 const unsigned char* in, int *inlen) {
3411 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3412}
3413
3414static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3415 const unsigned char* in, int *inlen) {
3416 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3417}
3418static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3419 const unsigned char* in, int *inlen) {
3420 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3421}
3422
3423static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3424 const unsigned char* in, int *inlen) {
3425 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3426}
3427static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3428 const unsigned char* in, int *inlen) {
3429 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3430}
3431
3432static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3433 const unsigned char* in, int *inlen) {
3434 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3435}
3436static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3437 const unsigned char* in, int *inlen) {
3438 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3439}
3440
3441static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3442 const unsigned char* in, int *inlen) {
3443 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3444}
3445static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3446 const unsigned char* in, int *inlen) {
3447 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3448}
3449
3450static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3451 const unsigned char* in, int *inlen) {
3452 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3453}
3454static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3455 const unsigned char* in, int *inlen) {
3456 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3457}
3458
3459static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3460 const unsigned char* in, int *inlen) {
3461 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3462}
3463static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3464 const unsigned char* in, int *inlen) {
3465 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3466}
3467
3468static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3469 const unsigned char* in, int *inlen) {
3470 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3471}
3472static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3473 const unsigned char* in, int *inlen) {
3474 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3475}
3476
3477static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3478 const unsigned char* in, int *inlen) {
3479 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3480}
3481static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3482 const unsigned char* in, int *inlen) {
3483 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3484}
3485
3486static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3487 const unsigned char* in, int *inlen) {
3488 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3489}
3490static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3491 const unsigned char* in, int *inlen) {
3492 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3493}
3494
3495static void
3496xmlRegisterCharEncodingHandlersISO8859x (void) {
3497 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3498 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3499 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3500 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3501 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3502 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3503 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3504 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3505 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3506 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3507 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3508 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3509 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3510 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3511}
3512
3513#endif
3514#endif
3515
3516