blob: 8d193a9a7e5515ee70e132ec65adf7e55e85bbf2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000128 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
Daniel Veillardce682bc2004-11-05 17:22:25 +0000158 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000159 if (in == NULL) {
160 /*
161 * initialization nothing to do
162 */
163 *outlen = 0;
164 *inlen = 0;
165 return(0);
166 }
167 inend = in + (*inlen);
168 outend = out + (*outlen);
169 while (in < inend) {
170 d = *in++;
171 if (d < 0x80) { c= d; trailing= 0; }
172 else if (d < 0xC0) {
173 /* trailing byte in leading position */
174 *outlen = out - outstart;
175 *inlen = processed - instart;
176 return(-2);
177 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
178 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
179 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
180 else {
181 /* no chance for this in Ascii */
182 *outlen = out - outstart;
183 *inlen = processed - instart;
184 return(-2);
185 }
186
187 if (inend - in < trailing) {
188 break;
189 }
190
191 for ( ; trailing; trailing--) {
192 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
193 break;
194 c <<= 6;
195 c |= d & 0x3F;
196 }
197
198 /* assertion: c is a single UTF-4 value */
199 if (c < 0x80) {
200 if (out >= outend)
201 break;
202 *out++ = c;
203 } else {
204 /* no chance for this in Ascii */
205 *outlen = out - outstart;
206 *inlen = processed - instart;
207 return(-2);
208 }
209 processed = in;
210 }
211 *outlen = out - outstart;
212 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000213 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000214}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000215#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000216
217/**
218 * isolat1ToUTF8:
219 * @out: a pointer to an array of bytes to store the result
220 * @outlen: the length of @out
221 * @in: a pointer to an array of ISO Latin 1 chars
222 * @inlen: the length of @in
223 *
224 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
225 * block of chars out.
226 * Returns 0 if success, or -1 otherwise
227 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000230 */
231int
232isolat1ToUTF8(unsigned char* out, int *outlen,
233 const unsigned char* in, int *inlen) {
234 unsigned char* outstart = out;
235 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000236 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000237 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000238 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000239
Daniel Veillardce682bc2004-11-05 17:22:25 +0000240 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241 return(-1);
242
243 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000244 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000245 instop = inend;
246
247 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000248 if (*in >= 0x80) {
249 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
250 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000252 }
253 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000254 while (in < instop && *in < 0x80) {
255 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000256 }
257 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000258 if (in < inend && out < outend && *in < 0x80) {
259 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 }
261 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000262 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000263 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000264}
265
Daniel Veillard81601f92003-01-14 13:42:37 +0000266/**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
William M. Brackf9415e42003-11-28 09:39:10 +0000275 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000276 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000277 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000278 */
279static int
280UTF8ToUTF8(unsigned char* out, int *outlen,
281 const unsigned char* inb, int *inlenb)
282{
283 int len;
284
285 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
286 return(-1);
287 if (*outlen > *inlenb) {
288 len = *inlenb;
289 } else {
290 len = *outlen;
291 }
292 if (len < 0)
293 return(-1);
294
295 memcpy(out, inb, len);
296
297 *outlen = len;
298 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000299 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000300}
301
Daniel Veillarde72c7562002-05-31 09:47:30 +0000302
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000303#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000304/**
305 * UTF8Toisolat1:
306 * @out: a pointer to an array of bytes to store the result
307 * @outlen: the length of @out
308 * @in: a pointer to an array of UTF-8 chars
309 * @inlen: the length of @in
310 *
311 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
312 * block of chars out.
313 *
314 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
315 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000316 * if the return value is positive, else unpredictable.
317 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000318 */
319int
320UTF8Toisolat1(unsigned char* out, int *outlen,
321 const unsigned char* in, int *inlen) {
322 const unsigned char* processed = in;
323 const unsigned char* outend;
324 const unsigned char* outstart = out;
325 const unsigned char* instart = in;
326 const unsigned char* inend;
327 unsigned int c, d;
328 int trailing;
329
Daniel Veillardce682bc2004-11-05 17:22:25 +0000330 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000331 if (in == NULL) {
332 /*
333 * initialization nothing to do
334 */
335 *outlen = 0;
336 *inlen = 0;
337 return(0);
338 }
339 inend = in + (*inlen);
340 outend = out + (*outlen);
341 while (in < inend) {
342 d = *in++;
343 if (d < 0x80) { c= d; trailing= 0; }
344 else if (d < 0xC0) {
345 /* trailing byte in leading position */
346 *outlen = out - outstart;
347 *inlen = processed - instart;
348 return(-2);
349 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
350 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
351 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
352 else {
353 /* no chance for this in IsoLat1 */
354 *outlen = out - outstart;
355 *inlen = processed - instart;
356 return(-2);
357 }
358
359 if (inend - in < trailing) {
360 break;
361 }
362
363 for ( ; trailing; trailing--) {
364 if (in >= inend)
365 break;
366 if (((d= *in++) & 0xC0) != 0x80) {
367 *outlen = out - outstart;
368 *inlen = processed - instart;
369 return(-2);
370 }
371 c <<= 6;
372 c |= d & 0x3F;
373 }
374
375 /* assertion: c is a single UTF-4 value */
376 if (c <= 0xFF) {
377 if (out >= outend)
378 break;
379 *out++ = c;
380 } else {
381 /* no chance for this in IsoLat1 */
382 *outlen = out - outstart;
383 *inlen = processed - instart;
384 return(-2);
385 }
386 processed = in;
387 }
388 *outlen = out - outstart;
389 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000390 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000391}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000392#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000393
394/**
395 * UTF16LEToUTF8:
396 * @out: a pointer to an array of bytes to store the result
397 * @outlen: the length of @out
398 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
399 * @inlenb: the length of @in in UTF-16LE chars
400 *
401 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000402 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000403 * is the same between the native type of this machine and the
404 * inputed one.
405 *
William M. Brackf9415e42003-11-28 09:39:10 +0000406 * Returns the number of bytes written, or -1 if lack of space, or -2
407 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000408 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000409 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000410 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000411static int
Owen Taylor3473f882001-02-23 17:55:21 +0000412UTF16LEToUTF8(unsigned char* out, int *outlen,
413 const unsigned char* inb, int *inlenb)
414{
415 unsigned char* outstart = out;
416 const unsigned char* processed = inb;
417 unsigned char* outend = out + *outlen;
418 unsigned short* in = (unsigned short*) inb;
419 unsigned short* inend;
420 unsigned int c, d, inlen;
421 unsigned char *tmp;
422 int bits;
423
424 if ((*inlenb % 2) == 1)
425 (*inlenb)--;
426 inlen = *inlenb / 2;
427 inend = in + inlen;
428 while ((in < inend) && (out - outstart + 5 < *outlen)) {
429 if (xmlLittleEndian) {
430 c= *in++;
431 } else {
432 tmp = (unsigned char *) in;
433 c = *tmp++;
434 c = c | (((unsigned int)*tmp) << 8);
435 in++;
436 }
437 if ((c & 0xFC00) == 0xD800) { /* surrogates */
438 if (in >= inend) { /* (in > inend) shouldn't happens */
439 break;
440 }
441 if (xmlLittleEndian) {
442 d = *in++;
443 } else {
444 tmp = (unsigned char *) in;
445 d = *tmp++;
446 d = d | (((unsigned int)*tmp) << 8);
447 in++;
448 }
449 if ((d & 0xFC00) == 0xDC00) {
450 c &= 0x03FF;
451 c <<= 10;
452 c |= d & 0x03FF;
453 c += 0x10000;
454 }
455 else {
456 *outlen = out - outstart;
457 *inlenb = processed - inb;
458 return(-2);
459 }
460 }
461
462 /* assertion: c is a single UTF-4 value */
463 if (out >= outend)
464 break;
465 if (c < 0x80) { *out++= c; bits= -6; }
466 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
467 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
468 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
469
470 for ( ; bits >= 0; bits-= 6) {
471 if (out >= outend)
472 break;
473 *out++= ((c >> bits) & 0x3F) | 0x80;
474 }
475 processed = (const unsigned char*) in;
476 }
477 *outlen = out - outstart;
478 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000479 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000480}
481
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000482#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000483/**
484 * UTF8ToUTF16LE:
485 * @outb: a pointer to an array of bytes to store the result
486 * @outlen: the length of @outb
487 * @in: a pointer to an array of UTF-8 chars
488 * @inlen: the length of @in
489 *
490 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
491 * block of chars out.
492 *
William M. Brackf9415e42003-11-28 09:39:10 +0000493 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000494 * if the transcoding failed.
495 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000496static int
Owen Taylor3473f882001-02-23 17:55:21 +0000497UTF8ToUTF16LE(unsigned char* outb, int *outlen,
498 const unsigned char* in, int *inlen)
499{
500 unsigned short* out = (unsigned short*) outb;
501 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000502 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000503 unsigned short* outstart= out;
504 unsigned short* outend;
505 const unsigned char* inend= in+*inlen;
506 unsigned int c, d;
507 int trailing;
508 unsigned char *tmp;
509 unsigned short tmp1, tmp2;
510
William M. Brackf9415e42003-11-28 09:39:10 +0000511 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000512 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000513 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000514 *outlen = 0;
515 *inlen = 0;
516 return(0);
517 }
518 outend = out + (*outlen / 2);
519 while (in < inend) {
520 d= *in++;
521 if (d < 0x80) { c= d; trailing= 0; }
522 else if (d < 0xC0) {
523 /* trailing byte in leading position */
524 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000525 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 return(-2);
527 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
528 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
529 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
530 else {
531 /* no chance for this in UTF-16 */
532 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000533 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000534 return(-2);
535 }
536
537 if (inend - in < trailing) {
538 break;
539 }
540
541 for ( ; trailing; trailing--) {
542 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
543 break;
544 c <<= 6;
545 c |= d & 0x3F;
546 }
547
548 /* assertion: c is a single UTF-4 value */
549 if (c < 0x10000) {
550 if (out >= outend)
551 break;
552 if (xmlLittleEndian) {
553 *out++ = c;
554 } else {
555 tmp = (unsigned char *) out;
556 *tmp = c ;
557 *(tmp + 1) = c >> 8 ;
558 out++;
559 }
560 }
561 else if (c < 0x110000) {
562 if (out+1 >= outend)
563 break;
564 c -= 0x10000;
565 if (xmlLittleEndian) {
566 *out++ = 0xD800 | (c >> 10);
567 *out++ = 0xDC00 | (c & 0x03FF);
568 } else {
569 tmp1 = 0xD800 | (c >> 10);
570 tmp = (unsigned char *) out;
571 *tmp = (unsigned char) tmp1;
572 *(tmp + 1) = tmp1 >> 8;
573 out++;
574
575 tmp2 = 0xDC00 | (c & 0x03FF);
576 tmp = (unsigned char *) out;
577 *tmp = (unsigned char) tmp2;
578 *(tmp + 1) = tmp2 >> 8;
579 out++;
580 }
581 }
582 else
583 break;
584 processed = in;
585 }
586 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000587 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000588 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000589}
590
591/**
William M. Brackf9415e42003-11-28 09:39:10 +0000592 * UTF8ToUTF16:
593 * @outb: a pointer to an array of bytes to store the result
594 * @outlen: the length of @outb
595 * @in: a pointer to an array of UTF-8 chars
596 * @inlen: the length of @in
597 *
598 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
599 * block of chars out.
600 *
601 * Returns the number of bytes written, or -1 if lack of space, or -2
602 * if the transcoding failed.
603 */
604static int
605UTF8ToUTF16(unsigned char* outb, int *outlen,
606 const unsigned char* in, int *inlen)
607{
608 if (in == NULL) {
609 /*
610 * initialization, add the Byte Order Mark for UTF-16LE
611 */
612 if (*outlen >= 2) {
613 outb[0] = 0xFF;
614 outb[1] = 0xFE;
615 *outlen = 2;
616 *inlen = 0;
617#ifdef DEBUG_ENCODING
618 xmlGenericError(xmlGenericErrorContext,
619 "Added FFFE Byte Order Mark\n");
620#endif
621 return(2);
622 }
623 *outlen = 0;
624 *inlen = 0;
625 return(0);
626 }
627 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
628}
William M. Brack030a7a12004-02-10 12:48:57 +0000629#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000630
631/**
Owen Taylor3473f882001-02-23 17:55:21 +0000632 * UTF16BEToUTF8:
633 * @out: a pointer to an array of bytes to store the result
634 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000635 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000636 * @inlenb: the length of @in in UTF-16 chars
637 *
638 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000639 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000640 * is the same between the native type of this machine and the
641 * inputed one.
642 *
William M. Brackf9415e42003-11-28 09:39:10 +0000643 * Returns the number of bytes written, or -1 if lack of space, or -2
644 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000645 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000646 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000647 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000648static int
Owen Taylor3473f882001-02-23 17:55:21 +0000649UTF16BEToUTF8(unsigned char* out, int *outlen,
650 const unsigned char* inb, int *inlenb)
651{
652 unsigned char* outstart = out;
653 const unsigned char* processed = inb;
654 unsigned char* outend = out + *outlen;
655 unsigned short* in = (unsigned short*) inb;
656 unsigned short* inend;
657 unsigned int c, d, inlen;
658 unsigned char *tmp;
659 int bits;
660
661 if ((*inlenb % 2) == 1)
662 (*inlenb)--;
663 inlen = *inlenb / 2;
664 inend= in + inlen;
665 while (in < inend) {
666 if (xmlLittleEndian) {
667 tmp = (unsigned char *) in;
668 c = *tmp++;
669 c = c << 8;
670 c = c | (unsigned int) *tmp;
671 in++;
672 } else {
673 c= *in++;
674 }
675 if ((c & 0xFC00) == 0xD800) { /* surrogates */
676 if (in >= inend) { /* (in > inend) shouldn't happens */
677 *outlen = out - outstart;
678 *inlenb = processed - inb;
679 return(-2);
680 }
681 if (xmlLittleEndian) {
682 tmp = (unsigned char *) in;
683 d = *tmp++;
684 d = d << 8;
685 d = d | (unsigned int) *tmp;
686 in++;
687 } else {
688 d= *in++;
689 }
690 if ((d & 0xFC00) == 0xDC00) {
691 c &= 0x03FF;
692 c <<= 10;
693 c |= d & 0x03FF;
694 c += 0x10000;
695 }
696 else {
697 *outlen = out - outstart;
698 *inlenb = processed - inb;
699 return(-2);
700 }
701 }
702
703 /* assertion: c is a single UTF-4 value */
704 if (out >= outend)
705 break;
706 if (c < 0x80) { *out++= c; bits= -6; }
707 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
708 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
709 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
710
711 for ( ; bits >= 0; bits-= 6) {
712 if (out >= outend)
713 break;
714 *out++= ((c >> bits) & 0x3F) | 0x80;
715 }
716 processed = (const unsigned char*) in;
717 }
718 *outlen = out - outstart;
719 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000720 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000721}
722
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000723#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000724/**
725 * UTF8ToUTF16BE:
726 * @outb: a pointer to an array of bytes to store the result
727 * @outlen: the length of @outb
728 * @in: a pointer to an array of UTF-8 chars
729 * @inlen: the length of @in
730 *
731 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
732 * block of chars out.
733 *
734 * Returns the number of byte written, or -1 by lack of space, or -2
735 * if the transcoding failed.
736 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000737static int
Owen Taylor3473f882001-02-23 17:55:21 +0000738UTF8ToUTF16BE(unsigned char* outb, int *outlen,
739 const unsigned char* in, int *inlen)
740{
741 unsigned short* out = (unsigned short*) outb;
742 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000743 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 unsigned short* outstart= out;
745 unsigned short* outend;
746 const unsigned char* inend= in+*inlen;
747 unsigned int c, d;
748 int trailing;
749 unsigned char *tmp;
750 unsigned short tmp1, tmp2;
751
William M. Brackf9415e42003-11-28 09:39:10 +0000752 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000753 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000754 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000755 *outlen = 0;
756 *inlen = 0;
757 return(0);
758 }
759 outend = out + (*outlen / 2);
760 while (in < inend) {
761 d= *in++;
762 if (d < 0x80) { c= d; trailing= 0; }
763 else if (d < 0xC0) {
764 /* trailing byte in leading position */
765 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000766 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000767 return(-2);
768 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
769 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
770 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
771 else {
772 /* no chance for this in UTF-16 */
773 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000774 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000775 return(-2);
776 }
777
778 if (inend - in < trailing) {
779 break;
780 }
781
782 for ( ; trailing; trailing--) {
783 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
784 c <<= 6;
785 c |= d & 0x3F;
786 }
787
788 /* assertion: c is a single UTF-4 value */
789 if (c < 0x10000) {
790 if (out >= outend) break;
791 if (xmlLittleEndian) {
792 tmp = (unsigned char *) out;
793 *tmp = c >> 8;
794 *(tmp + 1) = c;
795 out++;
796 } else {
797 *out++ = c;
798 }
799 }
800 else if (c < 0x110000) {
801 if (out+1 >= outend) break;
802 c -= 0x10000;
803 if (xmlLittleEndian) {
804 tmp1 = 0xD800 | (c >> 10);
805 tmp = (unsigned char *) out;
806 *tmp = tmp1 >> 8;
807 *(tmp + 1) = (unsigned char) tmp1;
808 out++;
809
810 tmp2 = 0xDC00 | (c & 0x03FF);
811 tmp = (unsigned char *) out;
812 *tmp = tmp2 >> 8;
813 *(tmp + 1) = (unsigned char) tmp2;
814 out++;
815 } else {
816 *out++ = 0xD800 | (c >> 10);
817 *out++ = 0xDC00 | (c & 0x03FF);
818 }
819 }
820 else
821 break;
822 processed = in;
823 }
824 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000825 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000826 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000827}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000828#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000829
Daniel Veillard97ac1312001-05-30 19:14:17 +0000830/************************************************************************
831 * *
832 * Generic encoding handling routines *
833 * *
834 ************************************************************************/
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836/**
837 * xmlDetectCharEncoding:
838 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000839 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000840 * @len: pointer to the length of the buffer
841 *
842 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000843 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000844 *
845 * Returns one of the XML_CHAR_ENCODING_... values.
846 */
847xmlCharEncoding
848xmlDetectCharEncoding(const unsigned char* in, int len)
849{
Daniel Veillardce682bc2004-11-05 17:22:25 +0000850 if (in == NULL)
851 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000852 if (len >= 4) {
853 if ((in[0] == 0x00) && (in[1] == 0x00) &&
854 (in[2] == 0x00) && (in[3] == 0x3C))
855 return(XML_CHAR_ENCODING_UCS4BE);
856 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
857 (in[2] == 0x00) && (in[3] == 0x00))
858 return(XML_CHAR_ENCODING_UCS4LE);
859 if ((in[0] == 0x00) && (in[1] == 0x00) &&
860 (in[2] == 0x3C) && (in[3] == 0x00))
861 return(XML_CHAR_ENCODING_UCS4_2143);
862 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
863 (in[2] == 0x00) && (in[3] == 0x00))
864 return(XML_CHAR_ENCODING_UCS4_3412);
865 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
866 (in[2] == 0xA7) && (in[3] == 0x94))
867 return(XML_CHAR_ENCODING_EBCDIC);
868 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
869 (in[2] == 0x78) && (in[3] == 0x6D))
870 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000871 /*
872 * Although not part of the recommendation, we also
873 * attempt an "auto-recognition" of UTF-16LE and
874 * UTF-16BE encodings.
875 */
876 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
877 (in[2] == 0x3F) && (in[3] == 0x00))
878 return(XML_CHAR_ENCODING_UTF16LE);
879 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
880 (in[2] == 0x00) && (in[3] == 0x3F))
881 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000882 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000883 if (len >= 3) {
884 /*
885 * Errata on XML-1.0 June 20 2001
886 * We now allow an UTF8 encoded BOM
887 */
888 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
889 (in[2] == 0xBF))
890 return(XML_CHAR_ENCODING_UTF8);
891 }
William M. Brackf9415e42003-11-28 09:39:10 +0000892 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000893 if (len >= 2) {
894 if ((in[0] == 0xFE) && (in[1] == 0xFF))
895 return(XML_CHAR_ENCODING_UTF16BE);
896 if ((in[0] == 0xFF) && (in[1] == 0xFE))
897 return(XML_CHAR_ENCODING_UTF16LE);
898 }
899 return(XML_CHAR_ENCODING_NONE);
900}
901
902/**
903 * xmlCleanupEncodingAliases:
904 *
905 * Unregisters all aliases
906 */
907void
908xmlCleanupEncodingAliases(void) {
909 int i;
910
911 if (xmlCharEncodingAliases == NULL)
912 return;
913
914 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
915 if (xmlCharEncodingAliases[i].name != NULL)
916 xmlFree((char *) xmlCharEncodingAliases[i].name);
917 if (xmlCharEncodingAliases[i].alias != NULL)
918 xmlFree((char *) xmlCharEncodingAliases[i].alias);
919 }
920 xmlCharEncodingAliasesNb = 0;
921 xmlCharEncodingAliasesMax = 0;
922 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000923 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000924}
925
926/**
927 * xmlGetEncodingAlias:
928 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
929 *
930 * Lookup an encoding name for the given alias.
931 *
William M. Brackf9415e42003-11-28 09:39:10 +0000932 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000933 */
934const char *
935xmlGetEncodingAlias(const char *alias) {
936 int i;
937 char upper[100];
938
939 if (alias == NULL)
940 return(NULL);
941
942 if (xmlCharEncodingAliases == NULL)
943 return(NULL);
944
945 for (i = 0;i < 99;i++) {
946 upper[i] = toupper(alias[i]);
947 if (upper[i] == 0) break;
948 }
949 upper[i] = 0;
950
951 /*
952 * Walk down the list looking for a definition of the alias
953 */
954 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
955 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
956 return(xmlCharEncodingAliases[i].name);
957 }
958 }
959 return(NULL);
960}
961
962/**
963 * xmlAddEncodingAlias:
964 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
965 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
966 *
William M. Brackf9415e42003-11-28 09:39:10 +0000967 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000968 * will be overwritten.
969 *
970 * Returns 0 in case of success, -1 in case of error
971 */
972int
973xmlAddEncodingAlias(const char *name, const char *alias) {
974 int i;
975 char upper[100];
976
977 if ((name == NULL) || (alias == NULL))
978 return(-1);
979
980 for (i = 0;i < 99;i++) {
981 upper[i] = toupper(alias[i]);
982 if (upper[i] == 0) break;
983 }
984 upper[i] = 0;
985
986 if (xmlCharEncodingAliases == NULL) {
987 xmlCharEncodingAliasesNb = 0;
988 xmlCharEncodingAliasesMax = 20;
989 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
990 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
991 if (xmlCharEncodingAliases == NULL)
992 return(-1);
993 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
994 xmlCharEncodingAliasesMax *= 2;
995 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
996 xmlRealloc(xmlCharEncodingAliases,
997 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
998 }
999 /*
1000 * Walk down the list looking for a definition of the alias
1001 */
1002 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1003 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1004 /*
1005 * Replace the definition.
1006 */
1007 xmlFree((char *) xmlCharEncodingAliases[i].name);
1008 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1009 return(0);
1010 }
1011 }
1012 /*
1013 * Add the definition
1014 */
1015 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1016 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1017 xmlCharEncodingAliasesNb++;
1018 return(0);
1019}
1020
1021/**
1022 * xmlDelEncodingAlias:
1023 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1024 *
1025 * Unregisters an encoding alias @alias
1026 *
1027 * Returns 0 in case of success, -1 in case of error
1028 */
1029int
1030xmlDelEncodingAlias(const char *alias) {
1031 int i;
1032
1033 if (alias == NULL)
1034 return(-1);
1035
1036 if (xmlCharEncodingAliases == NULL)
1037 return(-1);
1038 /*
1039 * Walk down the list looking for a definition of the alias
1040 */
1041 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1042 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1043 xmlFree((char *) xmlCharEncodingAliases[i].name);
1044 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1045 xmlCharEncodingAliasesNb--;
1046 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1047 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1048 return(0);
1049 }
1050 }
1051 return(-1);
1052}
1053
1054/**
1055 * xmlParseCharEncoding:
1056 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1057 *
William M. Brackf9415e42003-11-28 09:39:10 +00001058 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001059 * that the comparison is case insensitive accordingly to the section
1060 * [XML] 4.3.3 Character Encoding in Entities.
1061 *
1062 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1063 * if not recognized.
1064 */
1065xmlCharEncoding
1066xmlParseCharEncoding(const char* name)
1067{
1068 const char *alias;
1069 char upper[500];
1070 int i;
1071
1072 if (name == NULL)
1073 return(XML_CHAR_ENCODING_NONE);
1074
1075 /*
1076 * Do the alias resolution
1077 */
1078 alias = xmlGetEncodingAlias(name);
1079 if (alias != NULL)
1080 name = alias;
1081
1082 for (i = 0;i < 499;i++) {
1083 upper[i] = toupper(name[i]);
1084 if (upper[i] == 0) break;
1085 }
1086 upper[i] = 0;
1087
1088 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1089 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1090 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1091
1092 /*
1093 * NOTE: if we were able to parse this, the endianness of UTF16 is
1094 * already found and in use
1095 */
1096 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1097 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1098
1099 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1100 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1101 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1102
1103 /*
1104 * NOTE: if we were able to parse this, the endianness of UCS4 is
1105 * already found and in use
1106 */
1107 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1108 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1109 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1110
1111
1112 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1113 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1114 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1115
1116 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1117 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1118 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1119
1120 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1121 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1122 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1123 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1124 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1125 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1126 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1127
1128 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1129 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1130 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1131
1132#ifdef DEBUG_ENCODING
1133 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1134#endif
1135 return(XML_CHAR_ENCODING_ERROR);
1136}
1137
1138/**
1139 * xmlGetCharEncodingName:
1140 * @enc: the encoding
1141 *
1142 * The "canonical" name for XML encoding.
1143 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1144 * Section 4.3.3 Character Encoding in Entities
1145 *
1146 * Returns the canonical name for the given encoding
1147 */
1148
1149const char*
1150xmlGetCharEncodingName(xmlCharEncoding enc) {
1151 switch (enc) {
1152 case XML_CHAR_ENCODING_ERROR:
1153 return(NULL);
1154 case XML_CHAR_ENCODING_NONE:
1155 return(NULL);
1156 case XML_CHAR_ENCODING_UTF8:
1157 return("UTF-8");
1158 case XML_CHAR_ENCODING_UTF16LE:
1159 return("UTF-16");
1160 case XML_CHAR_ENCODING_UTF16BE:
1161 return("UTF-16");
1162 case XML_CHAR_ENCODING_EBCDIC:
1163 return("EBCDIC");
1164 case XML_CHAR_ENCODING_UCS4LE:
1165 return("ISO-10646-UCS-4");
1166 case XML_CHAR_ENCODING_UCS4BE:
1167 return("ISO-10646-UCS-4");
1168 case XML_CHAR_ENCODING_UCS4_2143:
1169 return("ISO-10646-UCS-4");
1170 case XML_CHAR_ENCODING_UCS4_3412:
1171 return("ISO-10646-UCS-4");
1172 case XML_CHAR_ENCODING_UCS2:
1173 return("ISO-10646-UCS-2");
1174 case XML_CHAR_ENCODING_8859_1:
1175 return("ISO-8859-1");
1176 case XML_CHAR_ENCODING_8859_2:
1177 return("ISO-8859-2");
1178 case XML_CHAR_ENCODING_8859_3:
1179 return("ISO-8859-3");
1180 case XML_CHAR_ENCODING_8859_4:
1181 return("ISO-8859-4");
1182 case XML_CHAR_ENCODING_8859_5:
1183 return("ISO-8859-5");
1184 case XML_CHAR_ENCODING_8859_6:
1185 return("ISO-8859-6");
1186 case XML_CHAR_ENCODING_8859_7:
1187 return("ISO-8859-7");
1188 case XML_CHAR_ENCODING_8859_8:
1189 return("ISO-8859-8");
1190 case XML_CHAR_ENCODING_8859_9:
1191 return("ISO-8859-9");
1192 case XML_CHAR_ENCODING_2022_JP:
1193 return("ISO-2022-JP");
1194 case XML_CHAR_ENCODING_SHIFT_JIS:
1195 return("Shift-JIS");
1196 case XML_CHAR_ENCODING_EUC_JP:
1197 return("EUC-JP");
1198 case XML_CHAR_ENCODING_ASCII:
1199 return(NULL);
1200 }
1201 return(NULL);
1202}
1203
Daniel Veillard97ac1312001-05-30 19:14:17 +00001204/************************************************************************
1205 * *
1206 * Char encoding handlers *
1207 * *
1208 ************************************************************************/
1209
Owen Taylor3473f882001-02-23 17:55:21 +00001210
1211/* the size should be growable, but it's not a big deal ... */
1212#define MAX_ENCODING_HANDLERS 50
1213static xmlCharEncodingHandlerPtr *handlers = NULL;
1214static int nbCharEncodingHandler = 0;
1215
1216/*
1217 * The default is UTF-8 for XML, that's also the default used for the
1218 * parser internals, so the default encoding handler is NULL
1219 */
1220
1221static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1222
1223/**
1224 * xmlNewCharEncodingHandler:
1225 * @name: the encoding name, in UTF-8 format (ASCII actually)
1226 * @input: the xmlCharEncodingInputFunc to read that encoding
1227 * @output: the xmlCharEncodingOutputFunc to write that encoding
1228 *
1229 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001230 *
Owen Taylor3473f882001-02-23 17:55:21 +00001231 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1232 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001233xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001234xmlNewCharEncodingHandler(const char *name,
1235 xmlCharEncodingInputFunc input,
1236 xmlCharEncodingOutputFunc output) {
1237 xmlCharEncodingHandlerPtr handler;
1238 const char *alias;
1239 char upper[500];
1240 int i;
1241 char *up = 0;
1242
1243 /*
1244 * Do the alias resolution
1245 */
1246 alias = xmlGetEncodingAlias(name);
1247 if (alias != NULL)
1248 name = alias;
1249
1250 /*
1251 * Keep only the uppercase version of the encoding.
1252 */
1253 if (name == NULL) {
1254 xmlGenericError(xmlGenericErrorContext,
1255 "xmlNewCharEncodingHandler : no name !\n");
1256 return(NULL);
1257 }
1258 for (i = 0;i < 499;i++) {
1259 upper[i] = toupper(name[i]);
1260 if (upper[i] == 0) break;
1261 }
1262 upper[i] = 0;
1263 up = xmlMemStrdup(upper);
1264 if (up == NULL) {
1265 xmlGenericError(xmlGenericErrorContext,
1266 "xmlNewCharEncodingHandler : out of memory !\n");
1267 return(NULL);
1268 }
1269
1270 /*
1271 * allocate and fill-up an handler block.
1272 */
1273 handler = (xmlCharEncodingHandlerPtr)
1274 xmlMalloc(sizeof(xmlCharEncodingHandler));
1275 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001276 xmlFree(up);
Owen Taylor3473f882001-02-23 17:55:21 +00001277 xmlGenericError(xmlGenericErrorContext,
1278 "xmlNewCharEncodingHandler : out of memory !\n");
1279 return(NULL);
1280 }
1281 handler->input = input;
1282 handler->output = output;
1283 handler->name = up;
1284
1285#ifdef LIBXML_ICONV_ENABLED
1286 handler->iconv_in = NULL;
1287 handler->iconv_out = NULL;
1288#endif /* LIBXML_ICONV_ENABLED */
1289
1290 /*
1291 * registers and returns the handler.
1292 */
1293 xmlRegisterCharEncodingHandler(handler);
1294#ifdef DEBUG_ENCODING
1295 xmlGenericError(xmlGenericErrorContext,
1296 "Registered encoding handler for %s\n", name);
1297#endif
1298 return(handler);
1299}
1300
1301/**
1302 * xmlInitCharEncodingHandlers:
1303 *
1304 * Initialize the char encoding support, it registers the default
1305 * encoding supported.
1306 * NOTE: while public, this function usually doesn't need to be called
1307 * in normal processing.
1308 */
1309void
1310xmlInitCharEncodingHandlers(void) {
1311 unsigned short int tst = 0x1234;
1312 unsigned char *ptr = (unsigned char *) &tst;
1313
1314 if (handlers != NULL) return;
1315
1316 handlers = (xmlCharEncodingHandlerPtr *)
1317 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1318
1319 if (*ptr == 0x12) xmlLittleEndian = 0;
1320 else if (*ptr == 0x34) xmlLittleEndian = 1;
1321 else xmlGenericError(xmlGenericErrorContext,
1322 "Odd problem at endianness detection\n");
1323
1324 if (handlers == NULL) {
1325 xmlGenericError(xmlGenericErrorContext,
1326 "xmlInitCharEncodingHandlers : out of memory !\n");
1327 return;
1328 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001329 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001330#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001331 xmlUTF16LEHandler =
1332 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1333 xmlUTF16BEHandler =
1334 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001335 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001336 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1337 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001338 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001339#ifdef LIBXML_HTML_ENABLED
1340 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1341#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001342#else
1343 xmlUTF16LEHandler =
1344 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1345 xmlUTF16BEHandler =
1346 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001347 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001348 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1349 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1350 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1351#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001352#ifndef LIBXML_ICONV_ENABLED
1353#ifdef LIBXML_ISO8859X_ENABLED
1354 xmlRegisterCharEncodingHandlersISO8859x ();
1355#endif
1356#endif
1357
Owen Taylor3473f882001-02-23 17:55:21 +00001358}
1359
1360/**
1361 * xmlCleanupCharEncodingHandlers:
1362 *
1363 * Cleanup the memory allocated for the char encoding support, it
1364 * unregisters all the encoding handlers and the aliases.
1365 */
1366void
1367xmlCleanupCharEncodingHandlers(void) {
1368 xmlCleanupEncodingAliases();
1369
1370 if (handlers == NULL) return;
1371
1372 for (;nbCharEncodingHandler > 0;) {
1373 nbCharEncodingHandler--;
1374 if (handlers[nbCharEncodingHandler] != NULL) {
1375 if (handlers[nbCharEncodingHandler]->name != NULL)
1376 xmlFree(handlers[nbCharEncodingHandler]->name);
1377 xmlFree(handlers[nbCharEncodingHandler]);
1378 }
1379 }
1380 xmlFree(handlers);
1381 handlers = NULL;
1382 nbCharEncodingHandler = 0;
1383 xmlDefaultCharEncodingHandler = NULL;
1384}
1385
1386/**
1387 * xmlRegisterCharEncodingHandler:
1388 * @handler: the xmlCharEncodingHandlerPtr handler block
1389 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001390 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001391 */
1392void
1393xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1394 if (handlers == NULL) xmlInitCharEncodingHandlers();
1395 if (handler == NULL) {
1396 xmlGenericError(xmlGenericErrorContext,
1397 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1398 return;
1399 }
1400
1401 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1402 xmlGenericError(xmlGenericErrorContext,
1403 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1404 xmlGenericError(xmlGenericErrorContext,
1405 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1406 return;
1407 }
1408 handlers[nbCharEncodingHandler++] = handler;
1409}
1410
1411/**
1412 * xmlGetCharEncodingHandler:
1413 * @enc: an xmlCharEncoding value.
1414 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001415 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001416 *
1417 * Returns the handler or NULL if not found
1418 */
1419xmlCharEncodingHandlerPtr
1420xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1421 xmlCharEncodingHandlerPtr handler;
1422
1423 if (handlers == NULL) xmlInitCharEncodingHandlers();
1424 switch (enc) {
1425 case XML_CHAR_ENCODING_ERROR:
1426 return(NULL);
1427 case XML_CHAR_ENCODING_NONE:
1428 return(NULL);
1429 case XML_CHAR_ENCODING_UTF8:
1430 return(NULL);
1431 case XML_CHAR_ENCODING_UTF16LE:
1432 return(xmlUTF16LEHandler);
1433 case XML_CHAR_ENCODING_UTF16BE:
1434 return(xmlUTF16BEHandler);
1435 case XML_CHAR_ENCODING_EBCDIC:
1436 handler = xmlFindCharEncodingHandler("EBCDIC");
1437 if (handler != NULL) return(handler);
1438 handler = xmlFindCharEncodingHandler("ebcdic");
1439 if (handler != NULL) return(handler);
1440 break;
1441 case XML_CHAR_ENCODING_UCS4BE:
1442 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1443 if (handler != NULL) return(handler);
1444 handler = xmlFindCharEncodingHandler("UCS-4");
1445 if (handler != NULL) return(handler);
1446 handler = xmlFindCharEncodingHandler("UCS4");
1447 if (handler != NULL) return(handler);
1448 break;
1449 case XML_CHAR_ENCODING_UCS4LE:
1450 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1451 if (handler != NULL) return(handler);
1452 handler = xmlFindCharEncodingHandler("UCS-4");
1453 if (handler != NULL) return(handler);
1454 handler = xmlFindCharEncodingHandler("UCS4");
1455 if (handler != NULL) return(handler);
1456 break;
1457 case XML_CHAR_ENCODING_UCS4_2143:
1458 break;
1459 case XML_CHAR_ENCODING_UCS4_3412:
1460 break;
1461 case XML_CHAR_ENCODING_UCS2:
1462 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1463 if (handler != NULL) return(handler);
1464 handler = xmlFindCharEncodingHandler("UCS-2");
1465 if (handler != NULL) return(handler);
1466 handler = xmlFindCharEncodingHandler("UCS2");
1467 if (handler != NULL) return(handler);
1468 break;
1469
1470 /*
1471 * We used to keep ISO Latin encodings native in the
1472 * generated data. This led to so many problems that
1473 * this has been removed. One can still change this
1474 * back by registering no-ops encoders for those
1475 */
1476 case XML_CHAR_ENCODING_8859_1:
1477 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1478 if (handler != NULL) return(handler);
1479 break;
1480 case XML_CHAR_ENCODING_8859_2:
1481 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1482 if (handler != NULL) return(handler);
1483 break;
1484 case XML_CHAR_ENCODING_8859_3:
1485 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1486 if (handler != NULL) return(handler);
1487 break;
1488 case XML_CHAR_ENCODING_8859_4:
1489 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1490 if (handler != NULL) return(handler);
1491 break;
1492 case XML_CHAR_ENCODING_8859_5:
1493 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1494 if (handler != NULL) return(handler);
1495 break;
1496 case XML_CHAR_ENCODING_8859_6:
1497 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1498 if (handler != NULL) return(handler);
1499 break;
1500 case XML_CHAR_ENCODING_8859_7:
1501 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1502 if (handler != NULL) return(handler);
1503 break;
1504 case XML_CHAR_ENCODING_8859_8:
1505 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1506 if (handler != NULL) return(handler);
1507 break;
1508 case XML_CHAR_ENCODING_8859_9:
1509 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1510 if (handler != NULL) return(handler);
1511 break;
1512
1513
1514 case XML_CHAR_ENCODING_2022_JP:
1515 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1516 if (handler != NULL) return(handler);
1517 break;
1518 case XML_CHAR_ENCODING_SHIFT_JIS:
1519 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1520 if (handler != NULL) return(handler);
1521 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1522 if (handler != NULL) return(handler);
1523 handler = xmlFindCharEncodingHandler("Shift_JIS");
1524 if (handler != NULL) return(handler);
1525 break;
1526 case XML_CHAR_ENCODING_EUC_JP:
1527 handler = xmlFindCharEncodingHandler("EUC-JP");
1528 if (handler != NULL) return(handler);
1529 break;
1530 default:
1531 break;
1532 }
1533
1534#ifdef DEBUG_ENCODING
1535 xmlGenericError(xmlGenericErrorContext,
1536 "No handler found for encoding %d\n", enc);
1537#endif
1538 return(NULL);
1539}
1540
1541/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001542 * xmlFindCharEncodingHandler:
1543 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001544 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001545 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001546 *
1547 * Returns the handler or NULL if not found
1548 */
1549xmlCharEncodingHandlerPtr
1550xmlFindCharEncodingHandler(const char *name) {
1551 const char *nalias;
1552 const char *norig;
1553 xmlCharEncoding alias;
1554#ifdef LIBXML_ICONV_ENABLED
1555 xmlCharEncodingHandlerPtr enc;
1556 iconv_t icv_in, icv_out;
1557#endif /* LIBXML_ICONV_ENABLED */
1558 char upper[100];
1559 int i;
1560
1561 if (handlers == NULL) xmlInitCharEncodingHandlers();
1562 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1563 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1564
1565 /*
1566 * Do the alias resolution
1567 */
1568 norig = name;
1569 nalias = xmlGetEncodingAlias(name);
1570 if (nalias != NULL)
1571 name = nalias;
1572
1573 /*
1574 * Check first for directly registered encoding names
1575 */
1576 for (i = 0;i < 99;i++) {
1577 upper[i] = toupper(name[i]);
1578 if (upper[i] == 0) break;
1579 }
1580 upper[i] = 0;
1581
1582 for (i = 0;i < nbCharEncodingHandler; i++)
1583 if (!strcmp(upper, handlers[i]->name)) {
1584#ifdef DEBUG_ENCODING
1585 xmlGenericError(xmlGenericErrorContext,
1586 "Found registered handler for encoding %s\n", name);
1587#endif
1588 return(handlers[i]);
1589 }
1590
1591#ifdef LIBXML_ICONV_ENABLED
1592 /* check whether iconv can handle this */
1593 icv_in = iconv_open("UTF-8", name);
1594 icv_out = iconv_open(name, "UTF-8");
1595 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1596 enc = (xmlCharEncodingHandlerPtr)
1597 xmlMalloc(sizeof(xmlCharEncodingHandler));
1598 if (enc == NULL) {
1599 iconv_close(icv_in);
1600 iconv_close(icv_out);
1601 return(NULL);
1602 }
1603 enc->name = xmlMemStrdup(name);
1604 enc->input = NULL;
1605 enc->output = NULL;
1606 enc->iconv_in = icv_in;
1607 enc->iconv_out = icv_out;
1608#ifdef DEBUG_ENCODING
1609 xmlGenericError(xmlGenericErrorContext,
1610 "Found iconv handler for encoding %s\n", name);
1611#endif
1612 return enc;
1613 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1614 xmlGenericError(xmlGenericErrorContext,
1615 "iconv : problems with filters for '%s'\n", name);
1616 }
1617#endif /* LIBXML_ICONV_ENABLED */
1618
1619#ifdef DEBUG_ENCODING
1620 xmlGenericError(xmlGenericErrorContext,
1621 "No handler found for encoding %s\n", name);
1622#endif
1623
1624 /*
1625 * Fallback using the canonical names
1626 */
1627 alias = xmlParseCharEncoding(norig);
1628 if (alias != XML_CHAR_ENCODING_ERROR) {
1629 const char* canon;
1630 canon = xmlGetCharEncodingName(alias);
1631 if ((canon != NULL) && (strcmp(name, canon))) {
1632 return(xmlFindCharEncodingHandler(canon));
1633 }
1634 }
1635
William M. Brackf9415e42003-11-28 09:39:10 +00001636 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001637 return(NULL);
1638}
1639
Daniel Veillard97ac1312001-05-30 19:14:17 +00001640/************************************************************************
1641 * *
1642 * ICONV based generic conversion functions *
1643 * *
1644 ************************************************************************/
1645
Owen Taylor3473f882001-02-23 17:55:21 +00001646#ifdef LIBXML_ICONV_ENABLED
1647/**
1648 * xmlIconvWrapper:
1649 * @cd: iconv converter data structure
1650 * @out: a pointer to an array of bytes to store the result
1651 * @outlen: the length of @out
1652 * @in: a pointer to an array of ISO Latin 1 chars
1653 * @inlen: the length of @in
1654 *
1655 * Returns 0 if success, or
1656 * -1 by lack of space, or
1657 * -2 if the transcoding fails (for *in is not valid utf8 string or
1658 * the result of transformation can't fit into the encoding we want), or
1659 * -3 if there the last byte can't form a single output char.
1660 *
1661 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001662 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001663 * The value of @outlen after return is the number of ocetes consumed.
1664 */
1665static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001666xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1667 const unsigned char *in, int *inlen) {
1668 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001669 const char *icv_in = (const char *) in;
1670 char *icv_out = (char *) out;
1671 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001672
Daniel Veillardce682bc2004-11-05 17:22:25 +00001673 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
1674 return(-1);
1675 icv_inlen = *inlen;
1676 icv_outlen = *outlen;
Darin Adler699613b2001-07-27 22:47:14 +00001677 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001678 if (in != NULL) {
1679 *inlen -= icv_inlen;
1680 *outlen -= icv_outlen;
1681 } else {
1682 *inlen = 0;
1683 *outlen = 0;
1684 }
1685 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001686#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001687 if (errno == EILSEQ) {
1688 return -2;
1689 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001690#endif
1691#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001692 if (errno == E2BIG) {
1693 return -1;
1694 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001695#endif
1696#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001697 if (errno == EINVAL) {
1698 return -3;
1699 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001700#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001701 {
1702 return -3;
1703 }
1704 }
1705 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001706}
1707#endif /* LIBXML_ICONV_ENABLED */
1708
Daniel Veillard97ac1312001-05-30 19:14:17 +00001709/************************************************************************
1710 * *
1711 * The real API used by libxml for on-the-fly conversion *
1712 * *
1713 ************************************************************************/
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715/**
1716 * xmlCharEncFirstLine:
1717 * @handler: char enconding transformation data structure
1718 * @out: an xmlBuffer for the output.
1719 * @in: an xmlBuffer for the input
1720 *
1721 * Front-end for the encoding handler input function, but handle only
1722 * the very first line, i.e. limit itself to 45 chars.
1723 *
1724 * Returns the number of byte written if success, or
1725 * -1 general error
1726 * -2 if the transcoding fails (for *in is not valid utf8 string or
1727 * the result of transformation can't fit into the encoding we want), or
1728 */
1729int
1730xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1731 xmlBufferPtr in) {
1732 int ret = -2;
1733 int written;
1734 int toconv;
1735
1736 if (handler == NULL) return(-1);
1737 if (out == NULL) return(-1);
1738 if (in == NULL) return(-1);
1739
1740 written = out->size - out->use;
1741 toconv = in->use;
1742 if (toconv * 2 >= written) {
1743 xmlBufferGrow(out, toconv);
1744 written = out->size - out->use - 1;
1745 }
1746
1747 /*
1748 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1749 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001750 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001751 */
1752 written = 45;
1753
1754 if (handler->input != NULL) {
1755 ret = handler->input(&out->content[out->use], &written,
1756 in->content, &toconv);
1757 xmlBufferShrink(in, toconv);
1758 out->use += written;
1759 out->content[out->use] = 0;
1760 }
1761#ifdef LIBXML_ICONV_ENABLED
1762 else if (handler->iconv_in != NULL) {
1763 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1764 &written, in->content, &toconv);
1765 xmlBufferShrink(in, toconv);
1766 out->use += written;
1767 out->content[out->use] = 0;
1768 if (ret == -1) ret = -3;
1769 }
1770#endif /* LIBXML_ICONV_ENABLED */
1771#ifdef DEBUG_ENCODING
1772 switch (ret) {
1773 case 0:
1774 xmlGenericError(xmlGenericErrorContext,
1775 "converted %d bytes to %d bytes of input\n",
1776 toconv, written);
1777 break;
1778 case -1:
1779 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1780 toconv, written, in->use);
1781 break;
1782 case -2:
1783 xmlGenericError(xmlGenericErrorContext,
1784 "input conversion failed due to input error\n");
1785 break;
1786 case -3:
1787 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1788 toconv, written, in->use);
1789 break;
1790 default:
1791 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1792 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001793#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001794 /*
1795 * Ignore when input buffer is not on a boundary
1796 */
1797 if (ret == -3) ret = 0;
1798 if (ret == -1) ret = 0;
1799 return(ret);
1800}
1801
1802/**
1803 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001805 * @out: an xmlBuffer for the output.
1806 * @in: an xmlBuffer for the input
1807 *
1808 * Generic front-end for the encoding handler input function
1809 *
1810 * Returns the number of byte written if success, or
1811 * -1 general error
1812 * -2 if the transcoding fails (for *in is not valid utf8 string or
1813 * the result of transformation can't fit into the encoding we want), or
1814 */
1815int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001816xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1817 xmlBufferPtr in)
1818{
Owen Taylor3473f882001-02-23 17:55:21 +00001819 int ret = -2;
1820 int written;
1821 int toconv;
1822
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001823 if (handler == NULL)
1824 return (-1);
1825 if (out == NULL)
1826 return (-1);
1827 if (in == NULL)
1828 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001829
1830 toconv = in->use;
1831 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001832 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001833 written = out->size - out->use;
1834 if (toconv * 2 >= written) {
1835 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001836 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001837 }
1838 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001839 ret = handler->input(&out->content[out->use], &written,
1840 in->content, &toconv);
1841 xmlBufferShrink(in, toconv);
1842 out->use += written;
1843 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001844 }
1845#ifdef LIBXML_ICONV_ENABLED
1846 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001847 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1848 &written, in->content, &toconv);
1849 xmlBufferShrink(in, toconv);
1850 out->use += written;
1851 out->content[out->use] = 0;
1852 if (ret == -1)
1853 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001854 }
1855#endif /* LIBXML_ICONV_ENABLED */
1856 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001857 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001858#ifdef DEBUG_ENCODING
1859 xmlGenericError(xmlGenericErrorContext,
1860 "converted %d bytes to %d bytes of input\n",
1861 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001862#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001863 break;
1864 case -1:
1865#ifdef DEBUG_ENCODING
1866 xmlGenericError(xmlGenericErrorContext,
1867 "converted %d bytes to %d bytes of input, %d left\n",
1868 toconv, written, in->use);
1869#endif
1870 break;
1871 case -3:
1872#ifdef DEBUG_ENCODING
1873 xmlGenericError(xmlGenericErrorContext,
1874 "converted %d bytes to %d bytes of input, %d left\n",
1875 toconv, written, in->use);
1876#endif
1877 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001878 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001879 xmlGenericError(xmlGenericErrorContext,
1880 "input conversion failed due to input error\n");
1881 xmlGenericError(xmlGenericErrorContext,
1882 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1883 in->content[0], in->content[1],
1884 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001885 }
1886 /*
1887 * Ignore when input buffer is not on a boundary
1888 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001889 if (ret == -3)
1890 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001891 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001892}
1893
1894/**
1895 * xmlCharEncOutFunc:
1896 * @handler: char enconding transformation data structure
1897 * @out: an xmlBuffer for the output.
1898 * @in: an xmlBuffer for the input
1899 *
1900 * Generic front-end for the encoding handler output function
1901 * a first call with @in == NULL has to be made firs to initiate the
1902 * output in case of non-stateless encoding needing to initiate their
1903 * state or the output (like the BOM in UTF16).
1904 * In case of UTF8 sequence conversion errors for the given encoder,
1905 * the content will be automatically remapped to a CharRef sequence.
1906 *
1907 * Returns the number of byte written if success, or
1908 * -1 general error
1909 * -2 if the transcoding fails (for *in is not valid utf8 string or
1910 * the result of transformation can't fit into the encoding we want), or
1911 */
1912int
1913xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1914 xmlBufferPtr in) {
1915 int ret = -2;
1916 int written;
1917 int writtentot = 0;
1918 int toconv;
1919 int output = 0;
1920
1921 if (handler == NULL) return(-1);
1922 if (out == NULL) return(-1);
1923
1924retry:
1925
1926 written = out->size - out->use;
1927
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001928 if (written > 0)
1929 written--; /* Gennady: count '/0' */
1930
Owen Taylor3473f882001-02-23 17:55:21 +00001931 /*
1932 * First specific handling of in = NULL, i.e. the initialization call
1933 */
1934 if (in == NULL) {
1935 toconv = 0;
1936 if (handler->output != NULL) {
1937 ret = handler->output(&out->content[out->use], &written,
1938 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001939 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001940 out->use += written;
1941 out->content[out->use] = 0;
1942 }
Owen Taylor3473f882001-02-23 17:55:21 +00001943 }
1944#ifdef LIBXML_ICONV_ENABLED
1945 else if (handler->iconv_out != NULL) {
1946 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1947 &written, NULL, &toconv);
1948 out->use += written;
1949 out->content[out->use] = 0;
1950 }
1951#endif /* LIBXML_ICONV_ENABLED */
1952#ifdef DEBUG_ENCODING
1953 xmlGenericError(xmlGenericErrorContext,
1954 "initialized encoder\n");
1955#endif
1956 return(0);
1957 }
1958
1959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001960 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001961 */
1962 toconv = in->use;
1963 if (toconv == 0)
1964 return(0);
1965 if (toconv * 2 >= written) {
1966 xmlBufferGrow(out, toconv * 2);
1967 written = out->size - out->use - 1;
1968 }
1969 if (handler->output != NULL) {
1970 ret = handler->output(&out->content[out->use], &written,
1971 in->content, &toconv);
1972 xmlBufferShrink(in, toconv);
1973 out->use += written;
1974 writtentot += written;
1975 out->content[out->use] = 0;
1976 }
1977#ifdef LIBXML_ICONV_ENABLED
1978 else if (handler->iconv_out != NULL) {
1979 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1980 &written, in->content, &toconv);
1981 xmlBufferShrink(in, toconv);
1982 out->use += written;
1983 writtentot += written;
1984 out->content[out->use] = 0;
1985 if (ret == -1) {
1986 if (written > 0) {
1987 /*
1988 * Can be a limitation of iconv
1989 */
1990 goto retry;
1991 }
1992 ret = -3;
1993 }
1994 }
1995#endif /* LIBXML_ICONV_ENABLED */
1996 else {
1997 xmlGenericError(xmlGenericErrorContext,
1998 "xmlCharEncOutFunc: no output function !\n");
1999 return(-1);
2000 }
2001
2002 if (ret >= 0) output += ret;
2003
2004 /*
2005 * Attempt to handle error cases
2006 */
2007 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002008 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002009#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002010 xmlGenericError(xmlGenericErrorContext,
2011 "converted %d bytes to %d bytes of output\n",
2012 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002013#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002014 break;
2015 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002016#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002017 xmlGenericError(xmlGenericErrorContext,
2018 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002019#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002020 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002021 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002022#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002023 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2024 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002025#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002026 break;
2027 case -2: {
2028 int len = in->use;
2029 const xmlChar *utf = (const xmlChar *) in->content;
2030 int cur;
2031
2032 cur = xmlGetUTF8Char(utf, &len);
2033 if (cur > 0) {
2034 xmlChar charref[20];
2035
2036#ifdef DEBUG_ENCODING
2037 xmlGenericError(xmlGenericErrorContext,
2038 "handling output conversion error\n");
2039 xmlGenericError(xmlGenericErrorContext,
2040 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2041 in->content[0], in->content[1],
2042 in->content[2], in->content[3]);
2043#endif
2044 /*
2045 * Removes the UTF8 sequence, and replace it by a charref
2046 * and continue the transcoding phase, hoping the error
2047 * did not mangle the encoder state.
2048 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002049 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002050 xmlBufferShrink(in, len);
2051 xmlBufferAddHead(in, charref, -1);
2052
2053 goto retry;
2054 } else {
2055 xmlGenericError(xmlGenericErrorContext,
2056 "output conversion failed due to conv error\n");
2057 xmlGenericError(xmlGenericErrorContext,
2058 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2059 in->content[0], in->content[1],
2060 in->content[2], in->content[3]);
2061 in->content[0] = ' ';
2062 }
2063 break;
2064 }
2065 }
2066 return(ret);
2067}
2068
2069/**
2070 * xmlCharEncCloseFunc:
2071 * @handler: char enconding transformation data structure
2072 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002073 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002074 *
2075 * Returns 0 if success, or -1 in case of error
2076 */
2077int
2078xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2079 int ret = 0;
2080 if (handler == NULL) return(-1);
2081 if (handler->name == NULL) return(-1);
2082#ifdef LIBXML_ICONV_ENABLED
2083 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002084 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002085 * and the associated icon resources.
2086 */
2087 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2088 if (handler->name != NULL)
2089 xmlFree(handler->name);
2090 handler->name = NULL;
2091 if (handler->iconv_out != NULL) {
2092 if (iconv_close(handler->iconv_out))
2093 ret = -1;
2094 handler->iconv_out = NULL;
2095 }
2096 if (handler->iconv_in != NULL) {
2097 if (iconv_close(handler->iconv_in))
2098 ret = -1;
2099 handler->iconv_in = NULL;
2100 }
2101 xmlFree(handler);
2102 }
2103#endif /* LIBXML_ICONV_ENABLED */
2104#ifdef DEBUG_ENCODING
2105 if (ret)
2106 xmlGenericError(xmlGenericErrorContext,
2107 "failed to close the encoding handler\n");
2108 else
2109 xmlGenericError(xmlGenericErrorContext,
2110 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002111#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002112
Owen Taylor3473f882001-02-23 17:55:21 +00002113 return(ret);
2114}
2115
Daniel Veillard36711902004-02-11 13:25:26 +00002116/**
2117 * xmlByteConsumed:
2118 * @ctxt: an XML parser context
2119 *
2120 * This function provides the current index of the parser relative
2121 * to the start of the current entity. This function is computed in
2122 * bytes from the beginning starting at zero and finishing at the
2123 * size in byte of the file if parsing a file. The function is
2124 * of constant cost if the input is UTF-8 but can be costly if run
2125 * on non-UTF-8 input.
2126 *
2127 * Returns the index in bytes from the beginning of the entity or -1
2128 * in case the index could not be computed.
2129 */
2130long
2131xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2132 xmlParserInputPtr in;
2133
2134 if (ctxt == NULL) return(-1);
2135 in = ctxt->input;
2136 if (in == NULL) return(-1);
2137 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2138 unsigned int unused = 0;
2139 xmlCharEncodingHandler * handler = in->buf->encoder;
2140 /*
2141 * Encoding conversion, compute the number of unused original
2142 * bytes from the input not consumed and substract that from
2143 * the raw consumed value, this is not a cheap operation
2144 */
2145 if (in->end - in->cur > 0) {
2146 static unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002147 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002148 int toconv = in->end - in->cur, written = 32000;
2149
2150 int ret;
2151
2152 if (handler->output != NULL) {
2153 do {
2154 toconv = in->end - cur;
2155 written = 32000;
2156 ret = handler->output(&convbuf[0], &written,
2157 cur, &toconv);
2158 if (ret == -1) return(-1);
2159 unused += written;
2160 cur += toconv;
2161 } while (ret == -2);
2162#ifdef LIBXML_ICONV_ENABLED
2163 } else if (handler->iconv_out != NULL) {
2164 do {
2165 toconv = in->end - cur;
2166 written = 32000;
2167 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2168 &written, cur, &toconv);
2169 if (ret == -1) {
2170 if (written > 0)
2171 ret = -2;
2172 else
2173 return(-1);
2174 }
2175 unused += written;
2176 cur += toconv;
2177 } while (ret == -2);
2178#endif
2179 } else {
2180 /* could not find a converter */
2181 return(-1);
2182 }
2183 }
2184 if (in->buf->rawconsumed < unused)
2185 return(-1);
2186 return(in->buf->rawconsumed - unused);
2187 }
2188 return(in->consumed + (in->cur - in->base));
2189}
2190
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002191#ifndef LIBXML_ICONV_ENABLED
2192#ifdef LIBXML_ISO8859X_ENABLED
2193
2194/**
2195 * UTF8ToISO8859x:
2196 * @out: a pointer to an array of bytes to store the result
2197 * @outlen: the length of @out
2198 * @in: a pointer to an array of UTF-8 chars
2199 * @inlen: the length of @in
2200 * @xlattable: the 2-level transcoding table
2201 *
2202 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2203 * block of chars out.
2204 *
2205 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2206 * The value of @inlen after return is the number of octets consumed
2207 * as the return value is positive, else unpredictable.
2208 * The value of @outlen after return is the number of ocetes consumed.
2209 */
2210static int
2211UTF8ToISO8859x(unsigned char* out, int *outlen,
2212 const unsigned char* in, int *inlen,
2213 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002214 const unsigned char* outstart = out;
2215 const unsigned char* inend;
2216 const unsigned char* instart = in;
2217
Daniel Veillardce682bc2004-11-05 17:22:25 +00002218 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2219 (xlattable == NULL))
2220 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002221 if (in == NULL) {
2222 /*
2223 * initialization nothing to do
2224 */
2225 *outlen = 0;
2226 *inlen = 0;
2227 return(0);
2228 }
2229 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002230 while (in < inend) {
2231 unsigned char d = *in++;
2232 if (d < 0x80) {
2233 *out++ = d;
2234 } else if (d < 0xC0) {
2235 /* trailing byte in leading position */
2236 *outlen = out - outstart;
2237 *inlen = in - instart - 1;
2238 return(-2);
2239 } else if (d < 0xE0) {
2240 unsigned char c;
2241 if (!(in < inend)) {
2242 /* trailing byte not in input buffer */
2243 *outlen = out - outstart;
2244 *inlen = in - instart - 1;
2245 return(-2);
2246 }
2247 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002248 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002249 /* not a trailing byte */
2250 *outlen = out - outstart;
2251 *inlen = in - instart - 2;
2252 return(-2);
2253 }
2254 c = c & 0x3F;
2255 d = d & 0x1F;
2256 d = xlattable [48 + c + xlattable [d] * 64];
2257 if (d == 0) {
2258 /* not in character set */
2259 *outlen = out - outstart;
2260 *inlen = in - instart - 2;
2261 return(-2);
2262 }
2263 *out++ = d;
2264 } else if (d < 0xF0) {
2265 unsigned char c1;
2266 unsigned char c2;
2267 if (!(in < inend - 1)) {
2268 /* trailing bytes not in input buffer */
2269 *outlen = out - outstart;
2270 *inlen = in - instart - 1;
2271 return(-2);
2272 }
2273 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002274 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002275 /* not a trailing byte (c1) */
2276 *outlen = out - outstart;
2277 *inlen = in - instart - 2;
2278 return(-2);
2279 }
2280 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002281 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002282 /* not a trailing byte (c2) */
2283 *outlen = out - outstart;
2284 *inlen = in - instart - 2;
2285 return(-2);
2286 }
2287 c1 = c1 & 0x3F;
2288 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002289 d = d & 0x0F;
2290 d = xlattable [48 + c2 + xlattable [48 + c1 +
2291 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002292 if (d == 0) {
2293 /* not in character set */
2294 *outlen = out - outstart;
2295 *inlen = in - instart - 3;
2296 return(-2);
2297 }
2298 *out++ = d;
2299 } else {
2300 /* cannot transcode >= U+010000 */
2301 *outlen = out - outstart;
2302 *inlen = in - instart - 1;
2303 return(-2);
2304 }
2305 }
2306 *outlen = out - outstart;
2307 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002308 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002309}
2310
2311/**
2312 * ISO8859xToUTF8
2313 * @out: a pointer to an array of bytes to store the result
2314 * @outlen: the length of @out
2315 * @in: a pointer to an array of ISO Latin 1 chars
2316 * @inlen: the length of @in
2317 *
2318 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2319 * block of chars out.
2320 * Returns 0 if success, or -1 otherwise
2321 * The value of @inlen after return is the number of octets consumed
2322 * The value of @outlen after return is the number of ocetes produced.
2323 */
2324static int
2325ISO8859xToUTF8(unsigned char* out, int *outlen,
2326 const unsigned char* in, int *inlen,
2327 unsigned short const *unicodetable) {
2328 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002329 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002330 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002331 const unsigned char* inend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002332 const unsigned char* instop = inend;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002333 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002334
Daniel Veillardce682bc2004-11-05 17:22:25 +00002335 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2336 (in == NULL) || (xlattable == NULL))
2337 return(-1);
2338 outend = out + *outlen;
2339 inend = in + *inlen;
2340 c = *in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002341 while (in < inend && out < outend - 1) {
2342 if (c >= 0x80) {
2343 c = unicodetable [c - 0x80];
2344 if (c == 0) {
2345 /* undefined code point */
2346 *outlen = out - outstart;
2347 *inlen = in - instart;
2348 return (-1);
2349 }
2350 if (c < 0x800) {
2351 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2352 *out++ = (c & 0x3F) | 0x80;
2353 } else {
2354 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2355 *out++ = ((c >> 6) & 0x3F) | 0x80;
2356 *out++ = (c & 0x3F) | 0x80;
2357 }
2358 ++in;
2359 c = *in;
2360 }
2361 if (instop - in > outend - out) instop = in + (outend - out);
2362 while (c < 0x80 && in < instop) {
2363 *out++ = c;
2364 ++in;
2365 c = *in;
2366 }
2367 }
2368 if (in < inend && out < outend && c < 0x80) {
2369 *out++ = c;
2370 ++in;
2371 }
2372 *outlen = out - outstart;
2373 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002374 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002375}
2376
2377
2378/************************************************************************
2379 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2380 ************************************************************************/
2381
2382static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2383 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2384 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2385 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2386 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2387 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2388 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2389 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2390 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2391 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2392 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2393 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2394 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2395 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2396 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2397 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2398 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2399};
2400
2401static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2402 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2409 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2410 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2411 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2412 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2413 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2414 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2417 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2418 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2421 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2422 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2423 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2424 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2425 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2426 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2427 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2428 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2429};
2430
2431static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2432 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2433 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2434 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2435 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2436 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2437 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2438 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2439 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2440 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2441 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2442 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2443 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2444 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2445 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2446 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2447 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2448};
2449
2450static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2451 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2458 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2459 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2460 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2461 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2462 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2464 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2465 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2467 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2470 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2478 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2479 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2480 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2481 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2482};
2483
2484static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2485 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2486 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2487 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2488 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2489 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2490 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2491 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2492 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2493 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2494 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2495 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2496 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2497 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2498 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2499 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2500 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2501};
2502
2503static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2504 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2511 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2512 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2513 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2514 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2515 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2516 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2517 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2518 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2519 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2520 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2521 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2522 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2523 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2524 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2527 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2528 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2529 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2530 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2531};
2532
2533static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2534 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2535 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2536 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2537 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2538 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2539 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2540 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2541 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2542 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2543 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2544 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2545 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2546 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2547 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2548 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2549 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2550};
2551
2552static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2553 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2554 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2555 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2560 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2561 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2562 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2564 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2565 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2566 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2567 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2568 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2569 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2572 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2577 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580};
2581
2582static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2583 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2584 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2585 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2586 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2587 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2588 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2589 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2590 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2591 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2592 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2593 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2594 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2595 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2596 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2597 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2598 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2599};
2600
2601static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2602 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2603 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2604 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2610 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2611 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2619 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2620 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2621 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2622 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2625};
2626
2627static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2628 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2629 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2630 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2631 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2632 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2633 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2634 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2635 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2636 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2637 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2638 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2639 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2640 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2641 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2642 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2643 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2644};
2645
2646static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2647 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2649 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2654 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2655 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2656 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2657 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2658 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2663 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2666 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2667 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2670 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2671 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2672 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2673 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2674 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678};
2679
2680static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2681 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2682 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2683 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2684 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2685 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2686 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2687 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2688 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2689 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2690 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2691 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2692 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2693 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2694 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2695 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2696 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2697};
2698
2699static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2700 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2708 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2709 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2710 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2711 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2718 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2719 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2724 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2729 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731};
2732
2733static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2734 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2735 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2736 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2737 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2738 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2739 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2740 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2741 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2742 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2743 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2744 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2745 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2746 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2747 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2748 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2749 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2750};
2751
2752static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2753 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2761 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2762 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2763 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2764 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2765 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2766 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2767 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2771 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2775 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776};
2777
2778static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2779 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2780 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2781 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2782 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2783 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2784 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2785 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2786 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2787 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2788 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2789 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2790 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2791 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2792 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2793 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2794 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2795};
2796
2797static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2798 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2806 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2807 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2808 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2809 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2810 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2811 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2812 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2813 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2815 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2816 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2817 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2821 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2826 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2827 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2828 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2829};
2830
2831static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2832 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2833 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2834 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2835 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2836 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2837 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2838 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2839 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2840 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2841 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2842 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2843 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2844 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2845 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2846 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2847 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2848};
2849
2850static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2851 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2859 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2860 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2863 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2864 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2866 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2867 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2868 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2869 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2870 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2875 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878};
2879
2880static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2881 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2882 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2883 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2884 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2885 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2886 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2887 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2888 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2889 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2890 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2891 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2892 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2893 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2894 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2895 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2896 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2897};
2898
2899static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2900 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2902 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2908 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2909 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2910 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2911 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2921 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2923 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2925 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2926 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2927 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2928 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2929 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2930 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2931};
2932
2933static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2934 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2935 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2936 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2937 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2938 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2939 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2940 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2941 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2942 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2943 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2944 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2945 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2946 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2947 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2948 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2949 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2950};
2951
2952static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2953 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2961 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2962 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2963 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2968 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2972 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2988 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2993 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2994 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2995 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
2996};
2997
2998static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
2999 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3000 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3001 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3002 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3003 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3004 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3005 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3006 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3007 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3008 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3009 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3010 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3011 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3012 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3013 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3014 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3015};
3016
3017static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3018 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3026 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3027 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3028 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3029 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3040 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3041 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3042 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3043 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3044 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3045};
3046
3047static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3048 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3049 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3050 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3051 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3052 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3053 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3054 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3055 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3056 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3057 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3058 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3059 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3060 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3061 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3062 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3063 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3064};
3065
3066static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3067 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3075 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3076 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3077 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3078 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3079 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3080 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3081 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3082 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3083 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3084 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3086 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3103 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3104 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3105 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3106};
3107
3108
3109/*
3110 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3111 */
3112
3113static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3114 const unsigned char* in, int *inlen) {
3115 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3116}
3117static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3118 const unsigned char* in, int *inlen) {
3119 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3120}
3121
3122static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3123 const unsigned char* in, int *inlen) {
3124 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3125}
3126static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3127 const unsigned char* in, int *inlen) {
3128 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3129}
3130
3131static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3132 const unsigned char* in, int *inlen) {
3133 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3134}
3135static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3136 const unsigned char* in, int *inlen) {
3137 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3138}
3139
3140static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3141 const unsigned char* in, int *inlen) {
3142 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3143}
3144static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3145 const unsigned char* in, int *inlen) {
3146 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3147}
3148
3149static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3150 const unsigned char* in, int *inlen) {
3151 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3152}
3153static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3154 const unsigned char* in, int *inlen) {
3155 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3156}
3157
3158static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3159 const unsigned char* in, int *inlen) {
3160 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3161}
3162static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3163 const unsigned char* in, int *inlen) {
3164 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3165}
3166
3167static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3168 const unsigned char* in, int *inlen) {
3169 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3170}
3171static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3172 const unsigned char* in, int *inlen) {
3173 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3174}
3175
3176static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3177 const unsigned char* in, int *inlen) {
3178 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3179}
3180static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3181 const unsigned char* in, int *inlen) {
3182 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3183}
3184
3185static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3186 const unsigned char* in, int *inlen) {
3187 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3188}
3189static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3190 const unsigned char* in, int *inlen) {
3191 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3192}
3193
3194static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3195 const unsigned char* in, int *inlen) {
3196 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3197}
3198static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3199 const unsigned char* in, int *inlen) {
3200 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3201}
3202
3203static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3204 const unsigned char* in, int *inlen) {
3205 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3206}
3207static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3208 const unsigned char* in, int *inlen) {
3209 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3210}
3211
3212static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3213 const unsigned char* in, int *inlen) {
3214 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3215}
3216static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3217 const unsigned char* in, int *inlen) {
3218 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3219}
3220
3221static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3222 const unsigned char* in, int *inlen) {
3223 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3224}
3225static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3226 const unsigned char* in, int *inlen) {
3227 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3228}
3229
3230static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3231 const unsigned char* in, int *inlen) {
3232 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3233}
3234static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3235 const unsigned char* in, int *inlen) {
3236 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3237}
3238
3239static void
3240xmlRegisterCharEncodingHandlersISO8859x (void) {
3241 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3242 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3243 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3244 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3245 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3246 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3247 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3248 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3249 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3250 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3251 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3252 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3253 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3254 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3255}
3256
3257#endif
3258#endif
3259
3260