blob: da063e2dab9ccf598524627daff29e2ebf8af4a6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000128 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
Daniel Veillardce682bc2004-11-05 17:22:25 +0000158 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000159 if (in == NULL) {
160 /*
161 * initialization nothing to do
162 */
163 *outlen = 0;
164 *inlen = 0;
165 return(0);
166 }
167 inend = in + (*inlen);
168 outend = out + (*outlen);
169 while (in < inend) {
170 d = *in++;
171 if (d < 0x80) { c= d; trailing= 0; }
172 else if (d < 0xC0) {
173 /* trailing byte in leading position */
174 *outlen = out - outstart;
175 *inlen = processed - instart;
176 return(-2);
177 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
178 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
179 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
180 else {
181 /* no chance for this in Ascii */
182 *outlen = out - outstart;
183 *inlen = processed - instart;
184 return(-2);
185 }
186
187 if (inend - in < trailing) {
188 break;
189 }
190
191 for ( ; trailing; trailing--) {
192 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
193 break;
194 c <<= 6;
195 c |= d & 0x3F;
196 }
197
198 /* assertion: c is a single UTF-4 value */
199 if (c < 0x80) {
200 if (out >= outend)
201 break;
202 *out++ = c;
203 } else {
204 /* no chance for this in Ascii */
205 *outlen = out - outstart;
206 *inlen = processed - instart;
207 return(-2);
208 }
209 processed = in;
210 }
211 *outlen = out - outstart;
212 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000213 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000214}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000215#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000216
217/**
218 * isolat1ToUTF8:
219 * @out: a pointer to an array of bytes to store the result
220 * @outlen: the length of @out
221 * @in: a pointer to an array of ISO Latin 1 chars
222 * @inlen: the length of @in
223 *
224 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
225 * block of chars out.
226 * Returns 0 if success, or -1 otherwise
227 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000230 */
231int
232isolat1ToUTF8(unsigned char* out, int *outlen,
233 const unsigned char* in, int *inlen) {
234 unsigned char* outstart = out;
235 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000236 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000237 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000238 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000239
Daniel Veillardce682bc2004-11-05 17:22:25 +0000240 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241 return(-1);
242
243 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000244 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000245 instop = inend;
246
247 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000248 if (*in >= 0x80) {
249 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
250 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000252 }
253 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000254 while (in < instop && *in < 0x80) {
255 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000256 }
257 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000258 if (in < inend && out < outend && *in < 0x80) {
259 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 }
261 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000262 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000263 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000264}
265
Daniel Veillard81601f92003-01-14 13:42:37 +0000266/**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
William M. Brackf9415e42003-11-28 09:39:10 +0000275 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000276 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000277 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000278 */
279static int
280UTF8ToUTF8(unsigned char* out, int *outlen,
281 const unsigned char* inb, int *inlenb)
282{
283 int len;
284
285 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
286 return(-1);
287 if (*outlen > *inlenb) {
288 len = *inlenb;
289 } else {
290 len = *outlen;
291 }
292 if (len < 0)
293 return(-1);
294
295 memcpy(out, inb, len);
296
297 *outlen = len;
298 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000299 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000300}
301
Daniel Veillarde72c7562002-05-31 09:47:30 +0000302
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000303#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000304/**
305 * UTF8Toisolat1:
306 * @out: a pointer to an array of bytes to store the result
307 * @outlen: the length of @out
308 * @in: a pointer to an array of UTF-8 chars
309 * @inlen: the length of @in
310 *
311 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
312 * block of chars out.
313 *
314 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
315 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000316 * if the return value is positive, else unpredictable.
317 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000318 */
319int
320UTF8Toisolat1(unsigned char* out, int *outlen,
321 const unsigned char* in, int *inlen) {
322 const unsigned char* processed = in;
323 const unsigned char* outend;
324 const unsigned char* outstart = out;
325 const unsigned char* instart = in;
326 const unsigned char* inend;
327 unsigned int c, d;
328 int trailing;
329
Daniel Veillardce682bc2004-11-05 17:22:25 +0000330 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000331 if (in == NULL) {
332 /*
333 * initialization nothing to do
334 */
335 *outlen = 0;
336 *inlen = 0;
337 return(0);
338 }
339 inend = in + (*inlen);
340 outend = out + (*outlen);
341 while (in < inend) {
342 d = *in++;
343 if (d < 0x80) { c= d; trailing= 0; }
344 else if (d < 0xC0) {
345 /* trailing byte in leading position */
346 *outlen = out - outstart;
347 *inlen = processed - instart;
348 return(-2);
349 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
350 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
351 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
352 else {
353 /* no chance for this in IsoLat1 */
354 *outlen = out - outstart;
355 *inlen = processed - instart;
356 return(-2);
357 }
358
359 if (inend - in < trailing) {
360 break;
361 }
362
363 for ( ; trailing; trailing--) {
364 if (in >= inend)
365 break;
366 if (((d= *in++) & 0xC0) != 0x80) {
367 *outlen = out - outstart;
368 *inlen = processed - instart;
369 return(-2);
370 }
371 c <<= 6;
372 c |= d & 0x3F;
373 }
374
375 /* assertion: c is a single UTF-4 value */
376 if (c <= 0xFF) {
377 if (out >= outend)
378 break;
379 *out++ = c;
380 } else {
381 /* no chance for this in IsoLat1 */
382 *outlen = out - outstart;
383 *inlen = processed - instart;
384 return(-2);
385 }
386 processed = in;
387 }
388 *outlen = out - outstart;
389 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000390 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000391}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000392#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000393
394/**
395 * UTF16LEToUTF8:
396 * @out: a pointer to an array of bytes to store the result
397 * @outlen: the length of @out
398 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
399 * @inlenb: the length of @in in UTF-16LE chars
400 *
401 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000402 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000403 * is the same between the native type of this machine and the
404 * inputed one.
405 *
William M. Brackf9415e42003-11-28 09:39:10 +0000406 * Returns the number of bytes written, or -1 if lack of space, or -2
407 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000408 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000409 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000410 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000411static int
Owen Taylor3473f882001-02-23 17:55:21 +0000412UTF16LEToUTF8(unsigned char* out, int *outlen,
413 const unsigned char* inb, int *inlenb)
414{
415 unsigned char* outstart = out;
416 const unsigned char* processed = inb;
417 unsigned char* outend = out + *outlen;
418 unsigned short* in = (unsigned short*) inb;
419 unsigned short* inend;
420 unsigned int c, d, inlen;
421 unsigned char *tmp;
422 int bits;
423
424 if ((*inlenb % 2) == 1)
425 (*inlenb)--;
426 inlen = *inlenb / 2;
427 inend = in + inlen;
428 while ((in < inend) && (out - outstart + 5 < *outlen)) {
429 if (xmlLittleEndian) {
430 c= *in++;
431 } else {
432 tmp = (unsigned char *) in;
433 c = *tmp++;
434 c = c | (((unsigned int)*tmp) << 8);
435 in++;
436 }
437 if ((c & 0xFC00) == 0xD800) { /* surrogates */
438 if (in >= inend) { /* (in > inend) shouldn't happens */
439 break;
440 }
441 if (xmlLittleEndian) {
442 d = *in++;
443 } else {
444 tmp = (unsigned char *) in;
445 d = *tmp++;
446 d = d | (((unsigned int)*tmp) << 8);
447 in++;
448 }
449 if ((d & 0xFC00) == 0xDC00) {
450 c &= 0x03FF;
451 c <<= 10;
452 c |= d & 0x03FF;
453 c += 0x10000;
454 }
455 else {
456 *outlen = out - outstart;
457 *inlenb = processed - inb;
458 return(-2);
459 }
460 }
461
462 /* assertion: c is a single UTF-4 value */
463 if (out >= outend)
464 break;
465 if (c < 0x80) { *out++= c; bits= -6; }
466 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
467 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
468 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
469
470 for ( ; bits >= 0; bits-= 6) {
471 if (out >= outend)
472 break;
473 *out++= ((c >> bits) & 0x3F) | 0x80;
474 }
475 processed = (const unsigned char*) in;
476 }
477 *outlen = out - outstart;
478 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000479 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000480}
481
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000482#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000483/**
484 * UTF8ToUTF16LE:
485 * @outb: a pointer to an array of bytes to store the result
486 * @outlen: the length of @outb
487 * @in: a pointer to an array of UTF-8 chars
488 * @inlen: the length of @in
489 *
490 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
491 * block of chars out.
492 *
William M. Brackf9415e42003-11-28 09:39:10 +0000493 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000494 * if the transcoding failed.
495 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000496static int
Owen Taylor3473f882001-02-23 17:55:21 +0000497UTF8ToUTF16LE(unsigned char* outb, int *outlen,
498 const unsigned char* in, int *inlen)
499{
500 unsigned short* out = (unsigned short*) outb;
501 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000502 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000503 unsigned short* outstart= out;
504 unsigned short* outend;
505 const unsigned char* inend= in+*inlen;
506 unsigned int c, d;
507 int trailing;
508 unsigned char *tmp;
509 unsigned short tmp1, tmp2;
510
William M. Brackf9415e42003-11-28 09:39:10 +0000511 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000512 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000513 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000514 *outlen = 0;
515 *inlen = 0;
516 return(0);
517 }
518 outend = out + (*outlen / 2);
519 while (in < inend) {
520 d= *in++;
521 if (d < 0x80) { c= d; trailing= 0; }
522 else if (d < 0xC0) {
523 /* trailing byte in leading position */
524 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000525 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 return(-2);
527 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
528 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
529 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
530 else {
531 /* no chance for this in UTF-16 */
532 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000533 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000534 return(-2);
535 }
536
537 if (inend - in < trailing) {
538 break;
539 }
540
541 for ( ; trailing; trailing--) {
542 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
543 break;
544 c <<= 6;
545 c |= d & 0x3F;
546 }
547
548 /* assertion: c is a single UTF-4 value */
549 if (c < 0x10000) {
550 if (out >= outend)
551 break;
552 if (xmlLittleEndian) {
553 *out++ = c;
554 } else {
555 tmp = (unsigned char *) out;
556 *tmp = c ;
557 *(tmp + 1) = c >> 8 ;
558 out++;
559 }
560 }
561 else if (c < 0x110000) {
562 if (out+1 >= outend)
563 break;
564 c -= 0x10000;
565 if (xmlLittleEndian) {
566 *out++ = 0xD800 | (c >> 10);
567 *out++ = 0xDC00 | (c & 0x03FF);
568 } else {
569 tmp1 = 0xD800 | (c >> 10);
570 tmp = (unsigned char *) out;
571 *tmp = (unsigned char) tmp1;
572 *(tmp + 1) = tmp1 >> 8;
573 out++;
574
575 tmp2 = 0xDC00 | (c & 0x03FF);
576 tmp = (unsigned char *) out;
577 *tmp = (unsigned char) tmp2;
578 *(tmp + 1) = tmp2 >> 8;
579 out++;
580 }
581 }
582 else
583 break;
584 processed = in;
585 }
586 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000587 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000588 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000589}
590
591/**
William M. Brackf9415e42003-11-28 09:39:10 +0000592 * UTF8ToUTF16:
593 * @outb: a pointer to an array of bytes to store the result
594 * @outlen: the length of @outb
595 * @in: a pointer to an array of UTF-8 chars
596 * @inlen: the length of @in
597 *
598 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
599 * block of chars out.
600 *
601 * Returns the number of bytes written, or -1 if lack of space, or -2
602 * if the transcoding failed.
603 */
604static int
605UTF8ToUTF16(unsigned char* outb, int *outlen,
606 const unsigned char* in, int *inlen)
607{
608 if (in == NULL) {
609 /*
610 * initialization, add the Byte Order Mark for UTF-16LE
611 */
612 if (*outlen >= 2) {
613 outb[0] = 0xFF;
614 outb[1] = 0xFE;
615 *outlen = 2;
616 *inlen = 0;
617#ifdef DEBUG_ENCODING
618 xmlGenericError(xmlGenericErrorContext,
619 "Added FFFE Byte Order Mark\n");
620#endif
621 return(2);
622 }
623 *outlen = 0;
624 *inlen = 0;
625 return(0);
626 }
627 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
628}
William M. Brack030a7a12004-02-10 12:48:57 +0000629#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000630
631/**
Owen Taylor3473f882001-02-23 17:55:21 +0000632 * UTF16BEToUTF8:
633 * @out: a pointer to an array of bytes to store the result
634 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000635 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000636 * @inlenb: the length of @in in UTF-16 chars
637 *
638 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000639 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000640 * is the same between the native type of this machine and the
641 * inputed one.
642 *
William M. Brackf9415e42003-11-28 09:39:10 +0000643 * Returns the number of bytes written, or -1 if lack of space, or -2
644 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000645 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000646 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000647 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000648static int
Owen Taylor3473f882001-02-23 17:55:21 +0000649UTF16BEToUTF8(unsigned char* out, int *outlen,
650 const unsigned char* inb, int *inlenb)
651{
652 unsigned char* outstart = out;
653 const unsigned char* processed = inb;
654 unsigned char* outend = out + *outlen;
655 unsigned short* in = (unsigned short*) inb;
656 unsigned short* inend;
657 unsigned int c, d, inlen;
658 unsigned char *tmp;
659 int bits;
660
661 if ((*inlenb % 2) == 1)
662 (*inlenb)--;
663 inlen = *inlenb / 2;
664 inend= in + inlen;
665 while (in < inend) {
666 if (xmlLittleEndian) {
667 tmp = (unsigned char *) in;
668 c = *tmp++;
669 c = c << 8;
670 c = c | (unsigned int) *tmp;
671 in++;
672 } else {
673 c= *in++;
674 }
675 if ((c & 0xFC00) == 0xD800) { /* surrogates */
676 if (in >= inend) { /* (in > inend) shouldn't happens */
677 *outlen = out - outstart;
678 *inlenb = processed - inb;
679 return(-2);
680 }
681 if (xmlLittleEndian) {
682 tmp = (unsigned char *) in;
683 d = *tmp++;
684 d = d << 8;
685 d = d | (unsigned int) *tmp;
686 in++;
687 } else {
688 d= *in++;
689 }
690 if ((d & 0xFC00) == 0xDC00) {
691 c &= 0x03FF;
692 c <<= 10;
693 c |= d & 0x03FF;
694 c += 0x10000;
695 }
696 else {
697 *outlen = out - outstart;
698 *inlenb = processed - inb;
699 return(-2);
700 }
701 }
702
703 /* assertion: c is a single UTF-4 value */
704 if (out >= outend)
705 break;
706 if (c < 0x80) { *out++= c; bits= -6; }
707 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
708 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
709 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
710
711 for ( ; bits >= 0; bits-= 6) {
712 if (out >= outend)
713 break;
714 *out++= ((c >> bits) & 0x3F) | 0x80;
715 }
716 processed = (const unsigned char*) in;
717 }
718 *outlen = out - outstart;
719 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000720 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000721}
722
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000723#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000724/**
725 * UTF8ToUTF16BE:
726 * @outb: a pointer to an array of bytes to store the result
727 * @outlen: the length of @outb
728 * @in: a pointer to an array of UTF-8 chars
729 * @inlen: the length of @in
730 *
731 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
732 * block of chars out.
733 *
734 * Returns the number of byte written, or -1 by lack of space, or -2
735 * if the transcoding failed.
736 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000737static int
Owen Taylor3473f882001-02-23 17:55:21 +0000738UTF8ToUTF16BE(unsigned char* outb, int *outlen,
739 const unsigned char* in, int *inlen)
740{
741 unsigned short* out = (unsigned short*) outb;
742 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000743 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 unsigned short* outstart= out;
745 unsigned short* outend;
746 const unsigned char* inend= in+*inlen;
747 unsigned int c, d;
748 int trailing;
749 unsigned char *tmp;
750 unsigned short tmp1, tmp2;
751
William M. Brackf9415e42003-11-28 09:39:10 +0000752 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000753 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000754 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000755 *outlen = 0;
756 *inlen = 0;
757 return(0);
758 }
759 outend = out + (*outlen / 2);
760 while (in < inend) {
761 d= *in++;
762 if (d < 0x80) { c= d; trailing= 0; }
763 else if (d < 0xC0) {
764 /* trailing byte in leading position */
765 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000766 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000767 return(-2);
768 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
769 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
770 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
771 else {
772 /* no chance for this in UTF-16 */
773 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000774 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000775 return(-2);
776 }
777
778 if (inend - in < trailing) {
779 break;
780 }
781
782 for ( ; trailing; trailing--) {
783 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
784 c <<= 6;
785 c |= d & 0x3F;
786 }
787
788 /* assertion: c is a single UTF-4 value */
789 if (c < 0x10000) {
790 if (out >= outend) break;
791 if (xmlLittleEndian) {
792 tmp = (unsigned char *) out;
793 *tmp = c >> 8;
794 *(tmp + 1) = c;
795 out++;
796 } else {
797 *out++ = c;
798 }
799 }
800 else if (c < 0x110000) {
801 if (out+1 >= outend) break;
802 c -= 0x10000;
803 if (xmlLittleEndian) {
804 tmp1 = 0xD800 | (c >> 10);
805 tmp = (unsigned char *) out;
806 *tmp = tmp1 >> 8;
807 *(tmp + 1) = (unsigned char) tmp1;
808 out++;
809
810 tmp2 = 0xDC00 | (c & 0x03FF);
811 tmp = (unsigned char *) out;
812 *tmp = tmp2 >> 8;
813 *(tmp + 1) = (unsigned char) tmp2;
814 out++;
815 } else {
816 *out++ = 0xD800 | (c >> 10);
817 *out++ = 0xDC00 | (c & 0x03FF);
818 }
819 }
820 else
821 break;
822 processed = in;
823 }
824 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000825 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000826 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000827}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000828#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000829
Daniel Veillard97ac1312001-05-30 19:14:17 +0000830/************************************************************************
831 * *
832 * Generic encoding handling routines *
833 * *
834 ************************************************************************/
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836/**
837 * xmlDetectCharEncoding:
838 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000839 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000840 * @len: pointer to the length of the buffer
841 *
842 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000843 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000844 *
845 * Returns one of the XML_CHAR_ENCODING_... values.
846 */
847xmlCharEncoding
848xmlDetectCharEncoding(const unsigned char* in, int len)
849{
Daniel Veillardce682bc2004-11-05 17:22:25 +0000850 if (in == NULL)
851 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000852 if (len >= 4) {
853 if ((in[0] == 0x00) && (in[1] == 0x00) &&
854 (in[2] == 0x00) && (in[3] == 0x3C))
855 return(XML_CHAR_ENCODING_UCS4BE);
856 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
857 (in[2] == 0x00) && (in[3] == 0x00))
858 return(XML_CHAR_ENCODING_UCS4LE);
859 if ((in[0] == 0x00) && (in[1] == 0x00) &&
860 (in[2] == 0x3C) && (in[3] == 0x00))
861 return(XML_CHAR_ENCODING_UCS4_2143);
862 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
863 (in[2] == 0x00) && (in[3] == 0x00))
864 return(XML_CHAR_ENCODING_UCS4_3412);
865 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
866 (in[2] == 0xA7) && (in[3] == 0x94))
867 return(XML_CHAR_ENCODING_EBCDIC);
868 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
869 (in[2] == 0x78) && (in[3] == 0x6D))
870 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000871 /*
872 * Although not part of the recommendation, we also
873 * attempt an "auto-recognition" of UTF-16LE and
874 * UTF-16BE encodings.
875 */
876 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
877 (in[2] == 0x3F) && (in[3] == 0x00))
878 return(XML_CHAR_ENCODING_UTF16LE);
879 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
880 (in[2] == 0x00) && (in[3] == 0x3F))
881 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000882 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000883 if (len >= 3) {
884 /*
885 * Errata on XML-1.0 June 20 2001
886 * We now allow an UTF8 encoded BOM
887 */
888 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
889 (in[2] == 0xBF))
890 return(XML_CHAR_ENCODING_UTF8);
891 }
William M. Brackf9415e42003-11-28 09:39:10 +0000892 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000893 if (len >= 2) {
894 if ((in[0] == 0xFE) && (in[1] == 0xFF))
895 return(XML_CHAR_ENCODING_UTF16BE);
896 if ((in[0] == 0xFF) && (in[1] == 0xFE))
897 return(XML_CHAR_ENCODING_UTF16LE);
898 }
899 return(XML_CHAR_ENCODING_NONE);
900}
901
902/**
903 * xmlCleanupEncodingAliases:
904 *
905 * Unregisters all aliases
906 */
907void
908xmlCleanupEncodingAliases(void) {
909 int i;
910
911 if (xmlCharEncodingAliases == NULL)
912 return;
913
914 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
915 if (xmlCharEncodingAliases[i].name != NULL)
916 xmlFree((char *) xmlCharEncodingAliases[i].name);
917 if (xmlCharEncodingAliases[i].alias != NULL)
918 xmlFree((char *) xmlCharEncodingAliases[i].alias);
919 }
920 xmlCharEncodingAliasesNb = 0;
921 xmlCharEncodingAliasesMax = 0;
922 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000923 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000924}
925
926/**
927 * xmlGetEncodingAlias:
928 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
929 *
930 * Lookup an encoding name for the given alias.
931 *
William M. Brackf9415e42003-11-28 09:39:10 +0000932 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000933 */
934const char *
935xmlGetEncodingAlias(const char *alias) {
936 int i;
937 char upper[100];
938
939 if (alias == NULL)
940 return(NULL);
941
942 if (xmlCharEncodingAliases == NULL)
943 return(NULL);
944
945 for (i = 0;i < 99;i++) {
946 upper[i] = toupper(alias[i]);
947 if (upper[i] == 0) break;
948 }
949 upper[i] = 0;
950
951 /*
952 * Walk down the list looking for a definition of the alias
953 */
954 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
955 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
956 return(xmlCharEncodingAliases[i].name);
957 }
958 }
959 return(NULL);
960}
961
962/**
963 * xmlAddEncodingAlias:
964 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
965 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
966 *
William M. Brackf9415e42003-11-28 09:39:10 +0000967 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000968 * will be overwritten.
969 *
970 * Returns 0 in case of success, -1 in case of error
971 */
972int
973xmlAddEncodingAlias(const char *name, const char *alias) {
974 int i;
975 char upper[100];
976
977 if ((name == NULL) || (alias == NULL))
978 return(-1);
979
980 for (i = 0;i < 99;i++) {
981 upper[i] = toupper(alias[i]);
982 if (upper[i] == 0) break;
983 }
984 upper[i] = 0;
985
986 if (xmlCharEncodingAliases == NULL) {
987 xmlCharEncodingAliasesNb = 0;
988 xmlCharEncodingAliasesMax = 20;
989 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
990 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
991 if (xmlCharEncodingAliases == NULL)
992 return(-1);
993 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
994 xmlCharEncodingAliasesMax *= 2;
995 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
996 xmlRealloc(xmlCharEncodingAliases,
997 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
998 }
999 /*
1000 * Walk down the list looking for a definition of the alias
1001 */
1002 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1003 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1004 /*
1005 * Replace the definition.
1006 */
1007 xmlFree((char *) xmlCharEncodingAliases[i].name);
1008 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1009 return(0);
1010 }
1011 }
1012 /*
1013 * Add the definition
1014 */
1015 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1016 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1017 xmlCharEncodingAliasesNb++;
1018 return(0);
1019}
1020
1021/**
1022 * xmlDelEncodingAlias:
1023 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1024 *
1025 * Unregisters an encoding alias @alias
1026 *
1027 * Returns 0 in case of success, -1 in case of error
1028 */
1029int
1030xmlDelEncodingAlias(const char *alias) {
1031 int i;
1032
1033 if (alias == NULL)
1034 return(-1);
1035
1036 if (xmlCharEncodingAliases == NULL)
1037 return(-1);
1038 /*
1039 * Walk down the list looking for a definition of the alias
1040 */
1041 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1042 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1043 xmlFree((char *) xmlCharEncodingAliases[i].name);
1044 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1045 xmlCharEncodingAliasesNb--;
1046 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1047 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1048 return(0);
1049 }
1050 }
1051 return(-1);
1052}
1053
1054/**
1055 * xmlParseCharEncoding:
1056 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1057 *
William M. Brackf9415e42003-11-28 09:39:10 +00001058 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001059 * that the comparison is case insensitive accordingly to the section
1060 * [XML] 4.3.3 Character Encoding in Entities.
1061 *
1062 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1063 * if not recognized.
1064 */
1065xmlCharEncoding
1066xmlParseCharEncoding(const char* name)
1067{
1068 const char *alias;
1069 char upper[500];
1070 int i;
1071
1072 if (name == NULL)
1073 return(XML_CHAR_ENCODING_NONE);
1074
1075 /*
1076 * Do the alias resolution
1077 */
1078 alias = xmlGetEncodingAlias(name);
1079 if (alias != NULL)
1080 name = alias;
1081
1082 for (i = 0;i < 499;i++) {
1083 upper[i] = toupper(name[i]);
1084 if (upper[i] == 0) break;
1085 }
1086 upper[i] = 0;
1087
1088 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1089 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1090 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1091
1092 /*
1093 * NOTE: if we were able to parse this, the endianness of UTF16 is
1094 * already found and in use
1095 */
1096 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1097 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1098
1099 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1100 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1101 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1102
1103 /*
1104 * NOTE: if we were able to parse this, the endianness of UCS4 is
1105 * already found and in use
1106 */
1107 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1108 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1109 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1110
1111
1112 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1113 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1114 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1115
1116 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1117 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1118 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1119
1120 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1121 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1122 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1123 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1124 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1125 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1126 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1127
1128 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1129 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1130 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1131
1132#ifdef DEBUG_ENCODING
1133 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1134#endif
1135 return(XML_CHAR_ENCODING_ERROR);
1136}
1137
1138/**
1139 * xmlGetCharEncodingName:
1140 * @enc: the encoding
1141 *
1142 * The "canonical" name for XML encoding.
1143 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1144 * Section 4.3.3 Character Encoding in Entities
1145 *
1146 * Returns the canonical name for the given encoding
1147 */
1148
1149const char*
1150xmlGetCharEncodingName(xmlCharEncoding enc) {
1151 switch (enc) {
1152 case XML_CHAR_ENCODING_ERROR:
1153 return(NULL);
1154 case XML_CHAR_ENCODING_NONE:
1155 return(NULL);
1156 case XML_CHAR_ENCODING_UTF8:
1157 return("UTF-8");
1158 case XML_CHAR_ENCODING_UTF16LE:
1159 return("UTF-16");
1160 case XML_CHAR_ENCODING_UTF16BE:
1161 return("UTF-16");
1162 case XML_CHAR_ENCODING_EBCDIC:
1163 return("EBCDIC");
1164 case XML_CHAR_ENCODING_UCS4LE:
1165 return("ISO-10646-UCS-4");
1166 case XML_CHAR_ENCODING_UCS4BE:
1167 return("ISO-10646-UCS-4");
1168 case XML_CHAR_ENCODING_UCS4_2143:
1169 return("ISO-10646-UCS-4");
1170 case XML_CHAR_ENCODING_UCS4_3412:
1171 return("ISO-10646-UCS-4");
1172 case XML_CHAR_ENCODING_UCS2:
1173 return("ISO-10646-UCS-2");
1174 case XML_CHAR_ENCODING_8859_1:
1175 return("ISO-8859-1");
1176 case XML_CHAR_ENCODING_8859_2:
1177 return("ISO-8859-2");
1178 case XML_CHAR_ENCODING_8859_3:
1179 return("ISO-8859-3");
1180 case XML_CHAR_ENCODING_8859_4:
1181 return("ISO-8859-4");
1182 case XML_CHAR_ENCODING_8859_5:
1183 return("ISO-8859-5");
1184 case XML_CHAR_ENCODING_8859_6:
1185 return("ISO-8859-6");
1186 case XML_CHAR_ENCODING_8859_7:
1187 return("ISO-8859-7");
1188 case XML_CHAR_ENCODING_8859_8:
1189 return("ISO-8859-8");
1190 case XML_CHAR_ENCODING_8859_9:
1191 return("ISO-8859-9");
1192 case XML_CHAR_ENCODING_2022_JP:
1193 return("ISO-2022-JP");
1194 case XML_CHAR_ENCODING_SHIFT_JIS:
1195 return("Shift-JIS");
1196 case XML_CHAR_ENCODING_EUC_JP:
1197 return("EUC-JP");
1198 case XML_CHAR_ENCODING_ASCII:
1199 return(NULL);
1200 }
1201 return(NULL);
1202}
1203
Daniel Veillard97ac1312001-05-30 19:14:17 +00001204/************************************************************************
1205 * *
1206 * Char encoding handlers *
1207 * *
1208 ************************************************************************/
1209
Owen Taylor3473f882001-02-23 17:55:21 +00001210
1211/* the size should be growable, but it's not a big deal ... */
1212#define MAX_ENCODING_HANDLERS 50
1213static xmlCharEncodingHandlerPtr *handlers = NULL;
1214static int nbCharEncodingHandler = 0;
1215
1216/*
1217 * The default is UTF-8 for XML, that's also the default used for the
1218 * parser internals, so the default encoding handler is NULL
1219 */
1220
1221static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1222
1223/**
1224 * xmlNewCharEncodingHandler:
1225 * @name: the encoding name, in UTF-8 format (ASCII actually)
1226 * @input: the xmlCharEncodingInputFunc to read that encoding
1227 * @output: the xmlCharEncodingOutputFunc to write that encoding
1228 *
1229 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001230 *
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1232 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001233xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001234xmlNewCharEncodingHandler(const char *name,
1235 xmlCharEncodingInputFunc input,
1236 xmlCharEncodingOutputFunc output) {
1237 xmlCharEncodingHandlerPtr handler;
1238 const char *alias;
1239 char upper[500];
1240 int i;
1241 char *up = 0;
1242
1243 /*
1244 * Do the alias resolution
1245 */
1246 alias = xmlGetEncodingAlias(name);
1247 if (alias != NULL)
1248 name = alias;
1249
1250 /*
1251 * Keep only the uppercase version of the encoding.
1252 */
1253 if (name == NULL) {
1254 xmlGenericError(xmlGenericErrorContext,
1255 "xmlNewCharEncodingHandler : no name !\n");
1256 return(NULL);
1257 }
1258 for (i = 0;i < 499;i++) {
1259 upper[i] = toupper(name[i]);
1260 if (upper[i] == 0) break;
1261 }
1262 upper[i] = 0;
1263 up = xmlMemStrdup(upper);
1264 if (up == NULL) {
1265 xmlGenericError(xmlGenericErrorContext,
1266 "xmlNewCharEncodingHandler : out of memory !\n");
1267 return(NULL);
1268 }
1269
1270 /*
1271 * allocate and fill-up an handler block.
1272 */
1273 handler = (xmlCharEncodingHandlerPtr)
1274 xmlMalloc(sizeof(xmlCharEncodingHandler));
1275 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001276 xmlFree(up);
Owen Taylor3473f882001-02-23 17:55:21 +00001277 xmlGenericError(xmlGenericErrorContext,
1278 "xmlNewCharEncodingHandler : out of memory !\n");
1279 return(NULL);
1280 }
1281 handler->input = input;
1282 handler->output = output;
1283 handler->name = up;
1284
1285#ifdef LIBXML_ICONV_ENABLED
1286 handler->iconv_in = NULL;
1287 handler->iconv_out = NULL;
1288#endif /* LIBXML_ICONV_ENABLED */
1289
1290 /*
1291 * registers and returns the handler.
1292 */
1293 xmlRegisterCharEncodingHandler(handler);
1294#ifdef DEBUG_ENCODING
1295 xmlGenericError(xmlGenericErrorContext,
1296 "Registered encoding handler for %s\n", name);
1297#endif
1298 return(handler);
1299}
1300
1301/**
1302 * xmlInitCharEncodingHandlers:
1303 *
1304 * Initialize the char encoding support, it registers the default
1305 * encoding supported.
1306 * NOTE: while public, this function usually doesn't need to be called
1307 * in normal processing.
1308 */
1309void
1310xmlInitCharEncodingHandlers(void) {
1311 unsigned short int tst = 0x1234;
1312 unsigned char *ptr = (unsigned char *) &tst;
1313
1314 if (handlers != NULL) return;
1315
1316 handlers = (xmlCharEncodingHandlerPtr *)
1317 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1318
1319 if (*ptr == 0x12) xmlLittleEndian = 0;
1320 else if (*ptr == 0x34) xmlLittleEndian = 1;
1321 else xmlGenericError(xmlGenericErrorContext,
1322 "Odd problem at endianness detection\n");
1323
1324 if (handlers == NULL) {
1325 xmlGenericError(xmlGenericErrorContext,
1326 "xmlInitCharEncodingHandlers : out of memory !\n");
1327 return;
1328 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001329 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001330#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001331 xmlUTF16LEHandler =
1332 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1333 xmlUTF16BEHandler =
1334 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001335 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001336 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1337 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001338 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001339#ifdef LIBXML_HTML_ENABLED
1340 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1341#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001342#else
1343 xmlUTF16LEHandler =
1344 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1345 xmlUTF16BEHandler =
1346 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001347 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001348 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1349 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1350 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1351#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001352#ifndef LIBXML_ICONV_ENABLED
1353#ifdef LIBXML_ISO8859X_ENABLED
1354 xmlRegisterCharEncodingHandlersISO8859x ();
1355#endif
1356#endif
1357
Owen Taylor3473f882001-02-23 17:55:21 +00001358}
1359
1360/**
1361 * xmlCleanupCharEncodingHandlers:
1362 *
1363 * Cleanup the memory allocated for the char encoding support, it
1364 * unregisters all the encoding handlers and the aliases.
1365 */
1366void
1367xmlCleanupCharEncodingHandlers(void) {
1368 xmlCleanupEncodingAliases();
1369
1370 if (handlers == NULL) return;
1371
1372 for (;nbCharEncodingHandler > 0;) {
1373 nbCharEncodingHandler--;
1374 if (handlers[nbCharEncodingHandler] != NULL) {
1375 if (handlers[nbCharEncodingHandler]->name != NULL)
1376 xmlFree(handlers[nbCharEncodingHandler]->name);
1377 xmlFree(handlers[nbCharEncodingHandler]);
1378 }
1379 }
1380 xmlFree(handlers);
1381 handlers = NULL;
1382 nbCharEncodingHandler = 0;
1383 xmlDefaultCharEncodingHandler = NULL;
1384}
1385
1386/**
1387 * xmlRegisterCharEncodingHandler:
1388 * @handler: the xmlCharEncodingHandlerPtr handler block
1389 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001390 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001391 */
1392void
1393xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1394 if (handlers == NULL) xmlInitCharEncodingHandlers();
1395 if (handler == NULL) {
1396 xmlGenericError(xmlGenericErrorContext,
1397 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1398 return;
1399 }
1400
1401 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1402 xmlGenericError(xmlGenericErrorContext,
1403 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1404 xmlGenericError(xmlGenericErrorContext,
1405 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1406 return;
1407 }
1408 handlers[nbCharEncodingHandler++] = handler;
1409}
1410
1411/**
1412 * xmlGetCharEncodingHandler:
1413 * @enc: an xmlCharEncoding value.
1414 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001415 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001416 *
1417 * Returns the handler or NULL if not found
1418 */
1419xmlCharEncodingHandlerPtr
1420xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1421 xmlCharEncodingHandlerPtr handler;
1422
1423 if (handlers == NULL) xmlInitCharEncodingHandlers();
1424 switch (enc) {
1425 case XML_CHAR_ENCODING_ERROR:
1426 return(NULL);
1427 case XML_CHAR_ENCODING_NONE:
1428 return(NULL);
1429 case XML_CHAR_ENCODING_UTF8:
1430 return(NULL);
1431 case XML_CHAR_ENCODING_UTF16LE:
1432 return(xmlUTF16LEHandler);
1433 case XML_CHAR_ENCODING_UTF16BE:
1434 return(xmlUTF16BEHandler);
1435 case XML_CHAR_ENCODING_EBCDIC:
1436 handler = xmlFindCharEncodingHandler("EBCDIC");
1437 if (handler != NULL) return(handler);
1438 handler = xmlFindCharEncodingHandler("ebcdic");
1439 if (handler != NULL) return(handler);
1440 break;
1441 case XML_CHAR_ENCODING_UCS4BE:
1442 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1443 if (handler != NULL) return(handler);
1444 handler = xmlFindCharEncodingHandler("UCS-4");
1445 if (handler != NULL) return(handler);
1446 handler = xmlFindCharEncodingHandler("UCS4");
1447 if (handler != NULL) return(handler);
1448 break;
1449 case XML_CHAR_ENCODING_UCS4LE:
1450 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1451 if (handler != NULL) return(handler);
1452 handler = xmlFindCharEncodingHandler("UCS-4");
1453 if (handler != NULL) return(handler);
1454 handler = xmlFindCharEncodingHandler("UCS4");
1455 if (handler != NULL) return(handler);
1456 break;
1457 case XML_CHAR_ENCODING_UCS4_2143:
1458 break;
1459 case XML_CHAR_ENCODING_UCS4_3412:
1460 break;
1461 case XML_CHAR_ENCODING_UCS2:
1462 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1463 if (handler != NULL) return(handler);
1464 handler = xmlFindCharEncodingHandler("UCS-2");
1465 if (handler != NULL) return(handler);
1466 handler = xmlFindCharEncodingHandler("UCS2");
1467 if (handler != NULL) return(handler);
1468 break;
1469
1470 /*
1471 * We used to keep ISO Latin encodings native in the
1472 * generated data. This led to so many problems that
1473 * this has been removed. One can still change this
1474 * back by registering no-ops encoders for those
1475 */
1476 case XML_CHAR_ENCODING_8859_1:
1477 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1478 if (handler != NULL) return(handler);
1479 break;
1480 case XML_CHAR_ENCODING_8859_2:
1481 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1482 if (handler != NULL) return(handler);
1483 break;
1484 case XML_CHAR_ENCODING_8859_3:
1485 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1486 if (handler != NULL) return(handler);
1487 break;
1488 case XML_CHAR_ENCODING_8859_4:
1489 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1490 if (handler != NULL) return(handler);
1491 break;
1492 case XML_CHAR_ENCODING_8859_5:
1493 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1494 if (handler != NULL) return(handler);
1495 break;
1496 case XML_CHAR_ENCODING_8859_6:
1497 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1498 if (handler != NULL) return(handler);
1499 break;
1500 case XML_CHAR_ENCODING_8859_7:
1501 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1502 if (handler != NULL) return(handler);
1503 break;
1504 case XML_CHAR_ENCODING_8859_8:
1505 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1506 if (handler != NULL) return(handler);
1507 break;
1508 case XML_CHAR_ENCODING_8859_9:
1509 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1510 if (handler != NULL) return(handler);
1511 break;
1512
1513
1514 case XML_CHAR_ENCODING_2022_JP:
1515 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1516 if (handler != NULL) return(handler);
1517 break;
1518 case XML_CHAR_ENCODING_SHIFT_JIS:
1519 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1520 if (handler != NULL) return(handler);
1521 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1522 if (handler != NULL) return(handler);
1523 handler = xmlFindCharEncodingHandler("Shift_JIS");
1524 if (handler != NULL) return(handler);
1525 break;
1526 case XML_CHAR_ENCODING_EUC_JP:
1527 handler = xmlFindCharEncodingHandler("EUC-JP");
1528 if (handler != NULL) return(handler);
1529 break;
1530 default:
1531 break;
1532 }
1533
1534#ifdef DEBUG_ENCODING
1535 xmlGenericError(xmlGenericErrorContext,
1536 "No handler found for encoding %d\n", enc);
1537#endif
1538 return(NULL);
1539}
1540
1541/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001542 * xmlFindCharEncodingHandler:
1543 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001544 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001545 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001546 *
1547 * Returns the handler or NULL if not found
1548 */
1549xmlCharEncodingHandlerPtr
1550xmlFindCharEncodingHandler(const char *name) {
1551 const char *nalias;
1552 const char *norig;
1553 xmlCharEncoding alias;
1554#ifdef LIBXML_ICONV_ENABLED
1555 xmlCharEncodingHandlerPtr enc;
1556 iconv_t icv_in, icv_out;
1557#endif /* LIBXML_ICONV_ENABLED */
1558 char upper[100];
1559 int i;
1560
1561 if (handlers == NULL) xmlInitCharEncodingHandlers();
1562 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1563 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1564
1565 /*
1566 * Do the alias resolution
1567 */
1568 norig = name;
1569 nalias = xmlGetEncodingAlias(name);
1570 if (nalias != NULL)
1571 name = nalias;
1572
1573 /*
1574 * Check first for directly registered encoding names
1575 */
1576 for (i = 0;i < 99;i++) {
1577 upper[i] = toupper(name[i]);
1578 if (upper[i] == 0) break;
1579 }
1580 upper[i] = 0;
1581
1582 for (i = 0;i < nbCharEncodingHandler; i++)
1583 if (!strcmp(upper, handlers[i]->name)) {
1584#ifdef DEBUG_ENCODING
1585 xmlGenericError(xmlGenericErrorContext,
1586 "Found registered handler for encoding %s\n", name);
1587#endif
1588 return(handlers[i]);
1589 }
1590
1591#ifdef LIBXML_ICONV_ENABLED
1592 /* check whether iconv can handle this */
1593 icv_in = iconv_open("UTF-8", name);
1594 icv_out = iconv_open(name, "UTF-8");
1595 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1596 enc = (xmlCharEncodingHandlerPtr)
1597 xmlMalloc(sizeof(xmlCharEncodingHandler));
1598 if (enc == NULL) {
1599 iconv_close(icv_in);
1600 iconv_close(icv_out);
1601 return(NULL);
1602 }
1603 enc->name = xmlMemStrdup(name);
1604 enc->input = NULL;
1605 enc->output = NULL;
1606 enc->iconv_in = icv_in;
1607 enc->iconv_out = icv_out;
1608#ifdef DEBUG_ENCODING
1609 xmlGenericError(xmlGenericErrorContext,
1610 "Found iconv handler for encoding %s\n", name);
1611#endif
1612 return enc;
1613 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1614 xmlGenericError(xmlGenericErrorContext,
1615 "iconv : problems with filters for '%s'\n", name);
1616 }
1617#endif /* LIBXML_ICONV_ENABLED */
1618
1619#ifdef DEBUG_ENCODING
1620 xmlGenericError(xmlGenericErrorContext,
1621 "No handler found for encoding %s\n", name);
1622#endif
1623
1624 /*
1625 * Fallback using the canonical names
1626 */
1627 alias = xmlParseCharEncoding(norig);
1628 if (alias != XML_CHAR_ENCODING_ERROR) {
1629 const char* canon;
1630 canon = xmlGetCharEncodingName(alias);
1631 if ((canon != NULL) && (strcmp(name, canon))) {
1632 return(xmlFindCharEncodingHandler(canon));
1633 }
1634 }
1635
William M. Brackf9415e42003-11-28 09:39:10 +00001636 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001637 return(NULL);
1638}
1639
Daniel Veillard97ac1312001-05-30 19:14:17 +00001640/************************************************************************
1641 * *
1642 * ICONV based generic conversion functions *
1643 * *
1644 ************************************************************************/
1645
Owen Taylor3473f882001-02-23 17:55:21 +00001646#ifdef LIBXML_ICONV_ENABLED
1647/**
1648 * xmlIconvWrapper:
1649 * @cd: iconv converter data structure
1650 * @out: a pointer to an array of bytes to store the result
1651 * @outlen: the length of @out
1652 * @in: a pointer to an array of ISO Latin 1 chars
1653 * @inlen: the length of @in
1654 *
1655 * Returns 0 if success, or
1656 * -1 by lack of space, or
1657 * -2 if the transcoding fails (for *in is not valid utf8 string or
1658 * the result of transformation can't fit into the encoding we want), or
1659 * -3 if there the last byte can't form a single output char.
1660 *
1661 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001662 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001663 * The value of @outlen after return is the number of ocetes consumed.
1664 */
1665static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001666xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1667 const unsigned char *in, int *inlen) {
1668 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001669 const char *icv_in = (const char *) in;
1670 char *icv_out = (char *) out;
1671 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001672
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001673 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1674 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001675 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001676 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001677 icv_inlen = *inlen;
1678 icv_outlen = *outlen;
Darin Adler699613b2001-07-27 22:47:14 +00001679 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001680 if (in != NULL) {
1681 *inlen -= icv_inlen;
1682 *outlen -= icv_outlen;
1683 } else {
1684 *inlen = 0;
1685 *outlen = 0;
1686 }
1687 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001688#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001689 if (errno == EILSEQ) {
1690 return -2;
1691 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001692#endif
1693#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001694 if (errno == E2BIG) {
1695 return -1;
1696 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001697#endif
1698#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001699 if (errno == EINVAL) {
1700 return -3;
1701 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001702#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001703 {
1704 return -3;
1705 }
1706 }
1707 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001708}
1709#endif /* LIBXML_ICONV_ENABLED */
1710
Daniel Veillard97ac1312001-05-30 19:14:17 +00001711/************************************************************************
1712 * *
1713 * The real API used by libxml for on-the-fly conversion *
1714 * *
1715 ************************************************************************/
1716
Owen Taylor3473f882001-02-23 17:55:21 +00001717/**
1718 * xmlCharEncFirstLine:
1719 * @handler: char enconding transformation data structure
1720 * @out: an xmlBuffer for the output.
1721 * @in: an xmlBuffer for the input
1722 *
1723 * Front-end for the encoding handler input function, but handle only
1724 * the very first line, i.e. limit itself to 45 chars.
1725 *
1726 * Returns the number of byte written if success, or
1727 * -1 general error
1728 * -2 if the transcoding fails (for *in is not valid utf8 string or
1729 * the result of transformation can't fit into the encoding we want), or
1730 */
1731int
1732xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1733 xmlBufferPtr in) {
1734 int ret = -2;
1735 int written;
1736 int toconv;
1737
1738 if (handler == NULL) return(-1);
1739 if (out == NULL) return(-1);
1740 if (in == NULL) return(-1);
1741
1742 written = out->size - out->use;
1743 toconv = in->use;
1744 if (toconv * 2 >= written) {
1745 xmlBufferGrow(out, toconv);
1746 written = out->size - out->use - 1;
1747 }
1748
1749 /*
1750 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1751 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001752 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001753 */
1754 written = 45;
1755
1756 if (handler->input != NULL) {
1757 ret = handler->input(&out->content[out->use], &written,
1758 in->content, &toconv);
1759 xmlBufferShrink(in, toconv);
1760 out->use += written;
1761 out->content[out->use] = 0;
1762 }
1763#ifdef LIBXML_ICONV_ENABLED
1764 else if (handler->iconv_in != NULL) {
1765 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1766 &written, in->content, &toconv);
1767 xmlBufferShrink(in, toconv);
1768 out->use += written;
1769 out->content[out->use] = 0;
1770 if (ret == -1) ret = -3;
1771 }
1772#endif /* LIBXML_ICONV_ENABLED */
1773#ifdef DEBUG_ENCODING
1774 switch (ret) {
1775 case 0:
1776 xmlGenericError(xmlGenericErrorContext,
1777 "converted %d bytes to %d bytes of input\n",
1778 toconv, written);
1779 break;
1780 case -1:
1781 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1782 toconv, written, in->use);
1783 break;
1784 case -2:
1785 xmlGenericError(xmlGenericErrorContext,
1786 "input conversion failed due to input error\n");
1787 break;
1788 case -3:
1789 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1790 toconv, written, in->use);
1791 break;
1792 default:
1793 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1794 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001795#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001796 /*
1797 * Ignore when input buffer is not on a boundary
1798 */
1799 if (ret == -3) ret = 0;
1800 if (ret == -1) ret = 0;
1801 return(ret);
1802}
1803
1804/**
1805 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001806 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001807 * @out: an xmlBuffer for the output.
1808 * @in: an xmlBuffer for the input
1809 *
1810 * Generic front-end for the encoding handler input function
1811 *
1812 * Returns the number of byte written if success, or
1813 * -1 general error
1814 * -2 if the transcoding fails (for *in is not valid utf8 string or
1815 * the result of transformation can't fit into the encoding we want), or
1816 */
1817int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001818xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1819 xmlBufferPtr in)
1820{
Owen Taylor3473f882001-02-23 17:55:21 +00001821 int ret = -2;
1822 int written;
1823 int toconv;
1824
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001825 if (handler == NULL)
1826 return (-1);
1827 if (out == NULL)
1828 return (-1);
1829 if (in == NULL)
1830 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001831
1832 toconv = in->use;
1833 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001834 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 written = out->size - out->use;
1836 if (toconv * 2 >= written) {
1837 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001838 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
1840 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001841 ret = handler->input(&out->content[out->use], &written,
1842 in->content, &toconv);
1843 xmlBufferShrink(in, toconv);
1844 out->use += written;
1845 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001846 }
1847#ifdef LIBXML_ICONV_ENABLED
1848 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001849 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1850 &written, in->content, &toconv);
1851 xmlBufferShrink(in, toconv);
1852 out->use += written;
1853 out->content[out->use] = 0;
1854 if (ret == -1)
1855 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001856 }
1857#endif /* LIBXML_ICONV_ENABLED */
1858 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001859 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001860#ifdef DEBUG_ENCODING
1861 xmlGenericError(xmlGenericErrorContext,
1862 "converted %d bytes to %d bytes of input\n",
1863 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001864#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001865 break;
1866 case -1:
1867#ifdef DEBUG_ENCODING
1868 xmlGenericError(xmlGenericErrorContext,
1869 "converted %d bytes to %d bytes of input, %d left\n",
1870 toconv, written, in->use);
1871#endif
1872 break;
1873 case -3:
1874#ifdef DEBUG_ENCODING
1875 xmlGenericError(xmlGenericErrorContext,
1876 "converted %d bytes to %d bytes of input, %d left\n",
1877 toconv, written, in->use);
1878#endif
1879 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001881 xmlGenericError(xmlGenericErrorContext,
1882 "input conversion failed due to input error\n");
1883 xmlGenericError(xmlGenericErrorContext,
1884 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1885 in->content[0], in->content[1],
1886 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888 /*
1889 * Ignore when input buffer is not on a boundary
1890 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001891 if (ret == -3)
1892 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001893 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001894}
1895
1896/**
1897 * xmlCharEncOutFunc:
1898 * @handler: char enconding transformation data structure
1899 * @out: an xmlBuffer for the output.
1900 * @in: an xmlBuffer for the input
1901 *
1902 * Generic front-end for the encoding handler output function
1903 * a first call with @in == NULL has to be made firs to initiate the
1904 * output in case of non-stateless encoding needing to initiate their
1905 * state or the output (like the BOM in UTF16).
1906 * In case of UTF8 sequence conversion errors for the given encoder,
1907 * the content will be automatically remapped to a CharRef sequence.
1908 *
1909 * Returns the number of byte written if success, or
1910 * -1 general error
1911 * -2 if the transcoding fails (for *in is not valid utf8 string or
1912 * the result of transformation can't fit into the encoding we want), or
1913 */
1914int
1915xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1916 xmlBufferPtr in) {
1917 int ret = -2;
1918 int written;
1919 int writtentot = 0;
1920 int toconv;
1921 int output = 0;
1922
1923 if (handler == NULL) return(-1);
1924 if (out == NULL) return(-1);
1925
1926retry:
1927
1928 written = out->size - out->use;
1929
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001930 if (written > 0)
1931 written--; /* Gennady: count '/0' */
1932
Owen Taylor3473f882001-02-23 17:55:21 +00001933 /*
1934 * First specific handling of in = NULL, i.e. the initialization call
1935 */
1936 if (in == NULL) {
1937 toconv = 0;
1938 if (handler->output != NULL) {
1939 ret = handler->output(&out->content[out->use], &written,
1940 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001941 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001942 out->use += written;
1943 out->content[out->use] = 0;
1944 }
Owen Taylor3473f882001-02-23 17:55:21 +00001945 }
1946#ifdef LIBXML_ICONV_ENABLED
1947 else if (handler->iconv_out != NULL) {
1948 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1949 &written, NULL, &toconv);
1950 out->use += written;
1951 out->content[out->use] = 0;
1952 }
1953#endif /* LIBXML_ICONV_ENABLED */
1954#ifdef DEBUG_ENCODING
1955 xmlGenericError(xmlGenericErrorContext,
1956 "initialized encoder\n");
1957#endif
1958 return(0);
1959 }
1960
1961 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001962 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001963 */
1964 toconv = in->use;
1965 if (toconv == 0)
1966 return(0);
1967 if (toconv * 2 >= written) {
1968 xmlBufferGrow(out, toconv * 2);
1969 written = out->size - out->use - 1;
1970 }
1971 if (handler->output != NULL) {
1972 ret = handler->output(&out->content[out->use], &written,
1973 in->content, &toconv);
1974 xmlBufferShrink(in, toconv);
1975 out->use += written;
1976 writtentot += written;
1977 out->content[out->use] = 0;
1978 }
1979#ifdef LIBXML_ICONV_ENABLED
1980 else if (handler->iconv_out != NULL) {
1981 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1982 &written, in->content, &toconv);
1983 xmlBufferShrink(in, toconv);
1984 out->use += written;
1985 writtentot += written;
1986 out->content[out->use] = 0;
1987 if (ret == -1) {
1988 if (written > 0) {
1989 /*
1990 * Can be a limitation of iconv
1991 */
1992 goto retry;
1993 }
1994 ret = -3;
1995 }
1996 }
1997#endif /* LIBXML_ICONV_ENABLED */
1998 else {
1999 xmlGenericError(xmlGenericErrorContext,
2000 "xmlCharEncOutFunc: no output function !\n");
2001 return(-1);
2002 }
2003
2004 if (ret >= 0) output += ret;
2005
2006 /*
2007 * Attempt to handle error cases
2008 */
2009 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002010 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002011#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002012 xmlGenericError(xmlGenericErrorContext,
2013 "converted %d bytes to %d bytes of output\n",
2014 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002015#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002016 break;
2017 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002018#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002019 xmlGenericError(xmlGenericErrorContext,
2020 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002021#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002022 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002023 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002024#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002025 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2026 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002027#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002028 break;
2029 case -2: {
2030 int len = in->use;
2031 const xmlChar *utf = (const xmlChar *) in->content;
2032 int cur;
2033
2034 cur = xmlGetUTF8Char(utf, &len);
2035 if (cur > 0) {
2036 xmlChar charref[20];
2037
2038#ifdef DEBUG_ENCODING
2039 xmlGenericError(xmlGenericErrorContext,
2040 "handling output conversion error\n");
2041 xmlGenericError(xmlGenericErrorContext,
2042 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2043 in->content[0], in->content[1],
2044 in->content[2], in->content[3]);
2045#endif
2046 /*
2047 * Removes the UTF8 sequence, and replace it by a charref
2048 * and continue the transcoding phase, hoping the error
2049 * did not mangle the encoder state.
2050 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002051 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 xmlBufferShrink(in, len);
2053 xmlBufferAddHead(in, charref, -1);
2054
2055 goto retry;
2056 } else {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "output conversion failed due to conv error\n");
2059 xmlGenericError(xmlGenericErrorContext,
2060 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2061 in->content[0], in->content[1],
2062 in->content[2], in->content[3]);
2063 in->content[0] = ' ';
2064 }
2065 break;
2066 }
2067 }
2068 return(ret);
2069}
2070
2071/**
2072 * xmlCharEncCloseFunc:
2073 * @handler: char enconding transformation data structure
2074 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002075 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002076 *
2077 * Returns 0 if success, or -1 in case of error
2078 */
2079int
2080xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2081 int ret = 0;
2082 if (handler == NULL) return(-1);
2083 if (handler->name == NULL) return(-1);
2084#ifdef LIBXML_ICONV_ENABLED
2085 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002086 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002087 * and the associated icon resources.
2088 */
2089 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2090 if (handler->name != NULL)
2091 xmlFree(handler->name);
2092 handler->name = NULL;
2093 if (handler->iconv_out != NULL) {
2094 if (iconv_close(handler->iconv_out))
2095 ret = -1;
2096 handler->iconv_out = NULL;
2097 }
2098 if (handler->iconv_in != NULL) {
2099 if (iconv_close(handler->iconv_in))
2100 ret = -1;
2101 handler->iconv_in = NULL;
2102 }
2103 xmlFree(handler);
2104 }
2105#endif /* LIBXML_ICONV_ENABLED */
2106#ifdef DEBUG_ENCODING
2107 if (ret)
2108 xmlGenericError(xmlGenericErrorContext,
2109 "failed to close the encoding handler\n");
2110 else
2111 xmlGenericError(xmlGenericErrorContext,
2112 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002113#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002114
Owen Taylor3473f882001-02-23 17:55:21 +00002115 return(ret);
2116}
2117
Daniel Veillard36711902004-02-11 13:25:26 +00002118/**
2119 * xmlByteConsumed:
2120 * @ctxt: an XML parser context
2121 *
2122 * This function provides the current index of the parser relative
2123 * to the start of the current entity. This function is computed in
2124 * bytes from the beginning starting at zero and finishing at the
2125 * size in byte of the file if parsing a file. The function is
2126 * of constant cost if the input is UTF-8 but can be costly if run
2127 * on non-UTF-8 input.
2128 *
2129 * Returns the index in bytes from the beginning of the entity or -1
2130 * in case the index could not be computed.
2131 */
2132long
2133xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2134 xmlParserInputPtr in;
2135
2136 if (ctxt == NULL) return(-1);
2137 in = ctxt->input;
2138 if (in == NULL) return(-1);
2139 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2140 unsigned int unused = 0;
2141 xmlCharEncodingHandler * handler = in->buf->encoder;
2142 /*
2143 * Encoding conversion, compute the number of unused original
2144 * bytes from the input not consumed and substract that from
2145 * the raw consumed value, this is not a cheap operation
2146 */
2147 if (in->end - in->cur > 0) {
2148 static unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002149 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002150 int toconv = in->end - in->cur, written = 32000;
2151
2152 int ret;
2153
2154 if (handler->output != NULL) {
2155 do {
2156 toconv = in->end - cur;
2157 written = 32000;
2158 ret = handler->output(&convbuf[0], &written,
2159 cur, &toconv);
2160 if (ret == -1) return(-1);
2161 unused += written;
2162 cur += toconv;
2163 } while (ret == -2);
2164#ifdef LIBXML_ICONV_ENABLED
2165 } else if (handler->iconv_out != NULL) {
2166 do {
2167 toconv = in->end - cur;
2168 written = 32000;
2169 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2170 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002171 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002172 if (written > 0)
2173 ret = -2;
2174 else
2175 return(-1);
2176 }
2177 unused += written;
2178 cur += toconv;
2179 } while (ret == -2);
2180#endif
2181 } else {
2182 /* could not find a converter */
2183 return(-1);
2184 }
2185 }
2186 if (in->buf->rawconsumed < unused)
2187 return(-1);
2188 return(in->buf->rawconsumed - unused);
2189 }
2190 return(in->consumed + (in->cur - in->base));
2191}
2192
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002193#ifndef LIBXML_ICONV_ENABLED
2194#ifdef LIBXML_ISO8859X_ENABLED
2195
2196/**
2197 * UTF8ToISO8859x:
2198 * @out: a pointer to an array of bytes to store the result
2199 * @outlen: the length of @out
2200 * @in: a pointer to an array of UTF-8 chars
2201 * @inlen: the length of @in
2202 * @xlattable: the 2-level transcoding table
2203 *
2204 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2205 * block of chars out.
2206 *
2207 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2208 * The value of @inlen after return is the number of octets consumed
2209 * as the return value is positive, else unpredictable.
2210 * The value of @outlen after return is the number of ocetes consumed.
2211 */
2212static int
2213UTF8ToISO8859x(unsigned char* out, int *outlen,
2214 const unsigned char* in, int *inlen,
2215 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002216 const unsigned char* outstart = out;
2217 const unsigned char* inend;
2218 const unsigned char* instart = in;
2219
Daniel Veillardce682bc2004-11-05 17:22:25 +00002220 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2221 (xlattable == NULL))
2222 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002223 if (in == NULL) {
2224 /*
2225 * initialization nothing to do
2226 */
2227 *outlen = 0;
2228 *inlen = 0;
2229 return(0);
2230 }
2231 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002232 while (in < inend) {
2233 unsigned char d = *in++;
2234 if (d < 0x80) {
2235 *out++ = d;
2236 } else if (d < 0xC0) {
2237 /* trailing byte in leading position */
2238 *outlen = out - outstart;
2239 *inlen = in - instart - 1;
2240 return(-2);
2241 } else if (d < 0xE0) {
2242 unsigned char c;
2243 if (!(in < inend)) {
2244 /* trailing byte not in input buffer */
2245 *outlen = out - outstart;
2246 *inlen = in - instart - 1;
2247 return(-2);
2248 }
2249 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002250 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002251 /* not a trailing byte */
2252 *outlen = out - outstart;
2253 *inlen = in - instart - 2;
2254 return(-2);
2255 }
2256 c = c & 0x3F;
2257 d = d & 0x1F;
2258 d = xlattable [48 + c + xlattable [d] * 64];
2259 if (d == 0) {
2260 /* not in character set */
2261 *outlen = out - outstart;
2262 *inlen = in - instart - 2;
2263 return(-2);
2264 }
2265 *out++ = d;
2266 } else if (d < 0xF0) {
2267 unsigned char c1;
2268 unsigned char c2;
2269 if (!(in < inend - 1)) {
2270 /* trailing bytes not in input buffer */
2271 *outlen = out - outstart;
2272 *inlen = in - instart - 1;
2273 return(-2);
2274 }
2275 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002276 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002277 /* not a trailing byte (c1) */
2278 *outlen = out - outstart;
2279 *inlen = in - instart - 2;
2280 return(-2);
2281 }
2282 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002283 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002284 /* not a trailing byte (c2) */
2285 *outlen = out - outstart;
2286 *inlen = in - instart - 2;
2287 return(-2);
2288 }
2289 c1 = c1 & 0x3F;
2290 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002291 d = d & 0x0F;
2292 d = xlattable [48 + c2 + xlattable [48 + c1 +
2293 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002294 if (d == 0) {
2295 /* not in character set */
2296 *outlen = out - outstart;
2297 *inlen = in - instart - 3;
2298 return(-2);
2299 }
2300 *out++ = d;
2301 } else {
2302 /* cannot transcode >= U+010000 */
2303 *outlen = out - outstart;
2304 *inlen = in - instart - 1;
2305 return(-2);
2306 }
2307 }
2308 *outlen = out - outstart;
2309 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002310 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002311}
2312
2313/**
2314 * ISO8859xToUTF8
2315 * @out: a pointer to an array of bytes to store the result
2316 * @outlen: the length of @out
2317 * @in: a pointer to an array of ISO Latin 1 chars
2318 * @inlen: the length of @in
2319 *
2320 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2321 * block of chars out.
2322 * Returns 0 if success, or -1 otherwise
2323 * The value of @inlen after return is the number of octets consumed
2324 * The value of @outlen after return is the number of ocetes produced.
2325 */
2326static int
2327ISO8859xToUTF8(unsigned char* out, int *outlen,
2328 const unsigned char* in, int *inlen,
2329 unsigned short const *unicodetable) {
2330 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002331 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002332 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002333 const unsigned char* inend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002334 const unsigned char* instop = inend;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002335 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002336
Daniel Veillardce682bc2004-11-05 17:22:25 +00002337 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002338 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002339 return(-1);
2340 outend = out + *outlen;
2341 inend = in + *inlen;
2342 c = *in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002343 while (in < inend && out < outend - 1) {
2344 if (c >= 0x80) {
2345 c = unicodetable [c - 0x80];
2346 if (c == 0) {
2347 /* undefined code point */
2348 *outlen = out - outstart;
2349 *inlen = in - instart;
2350 return (-1);
2351 }
2352 if (c < 0x800) {
2353 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2354 *out++ = (c & 0x3F) | 0x80;
2355 } else {
2356 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2357 *out++ = ((c >> 6) & 0x3F) | 0x80;
2358 *out++ = (c & 0x3F) | 0x80;
2359 }
2360 ++in;
2361 c = *in;
2362 }
2363 if (instop - in > outend - out) instop = in + (outend - out);
2364 while (c < 0x80 && in < instop) {
2365 *out++ = c;
2366 ++in;
2367 c = *in;
2368 }
2369 }
2370 if (in < inend && out < outend && c < 0x80) {
2371 *out++ = c;
2372 ++in;
2373 }
2374 *outlen = out - outstart;
2375 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002376 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002377}
2378
2379
2380/************************************************************************
2381 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2382 ************************************************************************/
2383
2384static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2385 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2386 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2387 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2388 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2389 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2390 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2391 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2392 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2393 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2394 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2395 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2396 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2397 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2398 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2399 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2400 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2401};
2402
2403static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2404 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2411 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2412 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2413 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2414 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2415 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2416 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2419 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2420 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2423 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2424 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2425 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2426 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2427 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2428 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2429 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2430 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2431};
2432
2433static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2434 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2435 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2436 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2437 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2438 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2439 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2440 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2441 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2442 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2443 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2444 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2445 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2446 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2447 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2448 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2449 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2450};
2451
2452static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2453 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2460 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2461 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2462 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2463 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2464 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2466 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2467 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2469 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2472 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2480 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2481 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2482 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2483 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2484};
2485
2486static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2487 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2488 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2489 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2490 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2491 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2492 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2493 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2494 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2495 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2496 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2497 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2498 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2499 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2500 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2501 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2502 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2503};
2504
2505static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2506 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2513 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2514 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2515 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2516 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2517 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2518 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2519 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2520 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2521 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2522 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2523 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2524 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2525 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2529 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2530 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2531 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2532 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2533};
2534
2535static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2536 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2537 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2538 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2539 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2540 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2541 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2542 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2543 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2544 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2545 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2546 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2547 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2548 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2549 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2550 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2551 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2552};
2553
2554static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2555 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2557 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2562 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2563 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2564 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2566 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2567 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2568 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2569 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2570 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2571 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2574 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2579 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2582};
2583
2584static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2585 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2586 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2587 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2588 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2589 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2590 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2591 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2592 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2593 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2594 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2595 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2596 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2597 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2598 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2599 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2600 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2601};
2602
2603static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2604 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2605 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2612 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2613 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2621 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2622 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2623 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2624 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2627};
2628
2629static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2630 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2631 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2632 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2633 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2634 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2635 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2636 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2637 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2638 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2639 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2640 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2641 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2642 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2643 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2644 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2645 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2646};
2647
2648static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2649 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2656 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2657 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2658 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2659 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2660 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2665 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2667 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2668 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2672 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2673 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2674 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2675 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2676 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680};
2681
2682static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2683 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2684 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2685 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2686 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2687 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2688 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2689 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2690 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2691 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2692 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2693 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2694 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2695 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2696 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2697 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2698 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2699};
2700
2701static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2702 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2710 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2711 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2712 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2713 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2718 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2726 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2731 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2733};
2734
2735static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2736 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2737 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2738 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2739 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2740 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2741 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2742 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2743 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2744 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2745 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2746 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2747 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2748 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2749 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2750 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2751 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2752};
2753
2754static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2755 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2763 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2764 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2765 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2766 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2767 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2768 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2769 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2773 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2775 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778};
2779
2780static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2781 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2782 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2783 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2784 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2785 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2786 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2787 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2788 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2789 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2790 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2791 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2792 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2793 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2794 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2795 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2796 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2797};
2798
2799static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2800 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2808 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2809 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2810 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2811 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2812 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2813 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2814 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2815 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2817 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2818 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2819 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2821 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2828 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2829 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2830 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2831};
2832
2833static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2834 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2835 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2836 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2837 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2838 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2839 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2840 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2841 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2842 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2843 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2844 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2845 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2846 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2847 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2848 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2849 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2850};
2851
2852static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2853 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2861 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2862 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2868 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2869 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2870 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2871 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2872 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2877 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880};
2881
2882static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2883 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2884 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2885 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2886 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2887 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2888 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2889 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2890 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2891 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2892 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2893 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2894 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2895 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2896 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2897 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2898 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2899};
2900
2901static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2902 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2910 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2911 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2912 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2913 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2919 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2923 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2925 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2926 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2927 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2928 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2929 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2930 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2931 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2932 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2933};
2934
2935static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2936 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2937 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2938 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2939 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2940 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2941 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2942 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2943 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2944 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2945 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2946 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2947 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2948 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2949 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2950 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2951 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2952};
2953
2954static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2955 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2963 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2964 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2965 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2970 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2971 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2972 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2990 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2995 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2996 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2997 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2998};
2999
3000static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3001 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3002 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3003 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3004 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3005 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3006 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3007 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3008 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3009 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3010 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3011 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3012 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3013 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3014 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3015 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3016 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3017};
3018
3019static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3020 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3028 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3029 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3030 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3031 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3040 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3043 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3044 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3045 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3046 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3047};
3048
3049static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3050 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3051 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3052 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3053 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3054 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3055 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3056 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3057 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3058 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3059 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3060 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3061 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3062 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3063 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3064 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3065 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3066};
3067
3068static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3069 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3077 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3078 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3079 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3080 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3081 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3082 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3083 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3086 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3088 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3105 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3106 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3107 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3108};
3109
3110
3111/*
3112 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3113 */
3114
3115static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3116 const unsigned char* in, int *inlen) {
3117 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3118}
3119static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3120 const unsigned char* in, int *inlen) {
3121 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3122}
3123
3124static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3125 const unsigned char* in, int *inlen) {
3126 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3127}
3128static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3129 const unsigned char* in, int *inlen) {
3130 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3131}
3132
3133static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3134 const unsigned char* in, int *inlen) {
3135 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3136}
3137static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3138 const unsigned char* in, int *inlen) {
3139 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3140}
3141
3142static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3143 const unsigned char* in, int *inlen) {
3144 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3145}
3146static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3147 const unsigned char* in, int *inlen) {
3148 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3149}
3150
3151static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3152 const unsigned char* in, int *inlen) {
3153 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3154}
3155static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3156 const unsigned char* in, int *inlen) {
3157 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3158}
3159
3160static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3161 const unsigned char* in, int *inlen) {
3162 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3163}
3164static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3165 const unsigned char* in, int *inlen) {
3166 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3167}
3168
3169static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3170 const unsigned char* in, int *inlen) {
3171 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3172}
3173static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3174 const unsigned char* in, int *inlen) {
3175 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3176}
3177
3178static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3179 const unsigned char* in, int *inlen) {
3180 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3181}
3182static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3183 const unsigned char* in, int *inlen) {
3184 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3185}
3186
3187static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3188 const unsigned char* in, int *inlen) {
3189 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3190}
3191static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3192 const unsigned char* in, int *inlen) {
3193 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3194}
3195
3196static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3197 const unsigned char* in, int *inlen) {
3198 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3199}
3200static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3201 const unsigned char* in, int *inlen) {
3202 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3203}
3204
3205static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3206 const unsigned char* in, int *inlen) {
3207 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3208}
3209static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3210 const unsigned char* in, int *inlen) {
3211 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3212}
3213
3214static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3215 const unsigned char* in, int *inlen) {
3216 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3217}
3218static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3219 const unsigned char* in, int *inlen) {
3220 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3221}
3222
3223static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3224 const unsigned char* in, int *inlen) {
3225 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3226}
3227static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3228 const unsigned char* in, int *inlen) {
3229 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3230}
3231
3232static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3233 const unsigned char* in, int *inlen) {
3234 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3235}
3236static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3237 const unsigned char* in, int *inlen) {
3238 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3239}
3240
3241static void
3242xmlRegisterCharEncodingHandlersISO8859x (void) {
3243 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3244 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3245 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3246 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3247 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3248 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3249 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3250 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3251 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3252 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3253 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3254 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3255 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3256 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3257}
3258
3259#endif
3260#endif
3261
3262