blob: 3f5d8b494915b2a35b03fedc97266fde784c4fb2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard97ac1312001-05-30 19:14:17 +000073
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
Owen Taylor3473f882001-02-23 17:55:21 +000081 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 * Returns 0 if success, or -1 otherwise
90 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +000091 * if the return value is positive, else unpredictable.
92 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +000093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094static int
Owen Taylor3473f882001-02-23 17:55:21 +000095asciiToUTF8(unsigned char* out, int *outlen,
96 const unsigned char* in, int *inlen) {
97 unsigned char* outstart = out;
98 const unsigned char* base = in;
99 const unsigned char* processed = in;
100 unsigned char* outend = out + *outlen;
101 const unsigned char* inend;
102 unsigned int c;
103 int bits;
104
105 inend = in + (*inlen);
106 while ((in < inend) && (out - outstart + 5 < *outlen)) {
107 c= *in++;
108
109 /* assertion: c is a single UTF-4 value */
110 if (out >= outend)
111 break;
112 if (c < 0x80) { *out++= c; bits= -6; }
113 else {
114 *outlen = out - outstart;
115 *inlen = processed - base;
116 return(-1);
117 }
118
119 for ( ; bits >= 0; bits-= 6) {
120 if (out >= outend)
121 break;
122 *out++= ((c >> bits) & 0x3F) | 0x80;
123 }
124 processed = (const unsigned char*) in;
125 }
126 *outlen = out - outstart;
127 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000128 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000129}
130
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000131#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000132/**
133 * UTF8Toascii:
134 * @out: a pointer to an array of bytes to store the result
135 * @outlen: the length of @out
136 * @in: a pointer to an array of UTF-8 chars
137 * @inlen: the length of @in
138 *
139 * Take a block of UTF-8 chars in and try to convert it to an ASCII
140 * block of chars out.
141 *
142 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
143 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static int
Owen Taylor3473f882001-02-23 17:55:21 +0000148UTF8Toascii(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 const unsigned char* processed = in;
151 const unsigned char* outend;
152 const unsigned char* outstart = out;
153 const unsigned char* instart = in;
154 const unsigned char* inend;
155 unsigned int c, d;
156 int trailing;
157
Daniel Veillardce682bc2004-11-05 17:22:25 +0000158 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000159 if (in == NULL) {
160 /*
161 * initialization nothing to do
162 */
163 *outlen = 0;
164 *inlen = 0;
165 return(0);
166 }
167 inend = in + (*inlen);
168 outend = out + (*outlen);
169 while (in < inend) {
170 d = *in++;
171 if (d < 0x80) { c= d; trailing= 0; }
172 else if (d < 0xC0) {
173 /* trailing byte in leading position */
174 *outlen = out - outstart;
175 *inlen = processed - instart;
176 return(-2);
177 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
178 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
179 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
180 else {
181 /* no chance for this in Ascii */
182 *outlen = out - outstart;
183 *inlen = processed - instart;
184 return(-2);
185 }
186
187 if (inend - in < trailing) {
188 break;
189 }
190
191 for ( ; trailing; trailing--) {
192 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
193 break;
194 c <<= 6;
195 c |= d & 0x3F;
196 }
197
198 /* assertion: c is a single UTF-4 value */
199 if (c < 0x80) {
200 if (out >= outend)
201 break;
202 *out++ = c;
203 } else {
204 /* no chance for this in Ascii */
205 *outlen = out - outstart;
206 *inlen = processed - instart;
207 return(-2);
208 }
209 processed = in;
210 }
211 *outlen = out - outstart;
212 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000213 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000214}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000215#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000216
217/**
218 * isolat1ToUTF8:
219 * @out: a pointer to an array of bytes to store the result
220 * @outlen: the length of @out
221 * @in: a pointer to an array of ISO Latin 1 chars
222 * @inlen: the length of @in
223 *
224 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
225 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000226 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000227 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000230 */
231int
232isolat1ToUTF8(unsigned char* out, int *outlen,
233 const unsigned char* in, int *inlen) {
234 unsigned char* outstart = out;
235 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000236 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000237 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000238 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000239
Daniel Veillardce682bc2004-11-05 17:22:25 +0000240 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241 return(-1);
242
243 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000244 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000245 instop = inend;
246
247 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000248 if (*in >= 0x80) {
249 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
250 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000251 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000252 }
253 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000254 while (in < instop && *in < 0x80) {
255 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000256 }
257 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000258 if (in < inend && out < outend && *in < 0x80) {
259 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 }
261 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000262 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000263 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000264}
265
Daniel Veillard81601f92003-01-14 13:42:37 +0000266/**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
William M. Brackf9415e42003-11-28 09:39:10 +0000275 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000276 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000277 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000278 */
279static int
280UTF8ToUTF8(unsigned char* out, int *outlen,
281 const unsigned char* inb, int *inlenb)
282{
283 int len;
284
285 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
286 return(-1);
287 if (*outlen > *inlenb) {
288 len = *inlenb;
289 } else {
290 len = *outlen;
291 }
292 if (len < 0)
293 return(-1);
294
295 memcpy(out, inb, len);
296
297 *outlen = len;
298 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000299 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000300}
301
Daniel Veillarde72c7562002-05-31 09:47:30 +0000302
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000303#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000304/**
305 * UTF8Toisolat1:
306 * @out: a pointer to an array of bytes to store the result
307 * @outlen: the length of @out
308 * @in: a pointer to an array of UTF-8 chars
309 * @inlen: the length of @in
310 *
311 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
312 * block of chars out.
313 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000314 * Returns the number of bytes written if success, -2 if the transcoding fails,
315 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000316 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000317 * if the return value is positive, else unpredictable.
318 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000319 */
320int
321UTF8Toisolat1(unsigned char* out, int *outlen,
322 const unsigned char* in, int *inlen) {
323 const unsigned char* processed = in;
324 const unsigned char* outend;
325 const unsigned char* outstart = out;
326 const unsigned char* instart = in;
327 const unsigned char* inend;
328 unsigned int c, d;
329 int trailing;
330
Daniel Veillardce682bc2004-11-05 17:22:25 +0000331 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000332 if (in == NULL) {
333 /*
334 * initialization nothing to do
335 */
336 *outlen = 0;
337 *inlen = 0;
338 return(0);
339 }
340 inend = in + (*inlen);
341 outend = out + (*outlen);
342 while (in < inend) {
343 d = *in++;
344 if (d < 0x80) { c= d; trailing= 0; }
345 else if (d < 0xC0) {
346 /* trailing byte in leading position */
347 *outlen = out - outstart;
348 *inlen = processed - instart;
349 return(-2);
350 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
351 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
352 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
353 else {
354 /* no chance for this in IsoLat1 */
355 *outlen = out - outstart;
356 *inlen = processed - instart;
357 return(-2);
358 }
359
360 if (inend - in < trailing) {
361 break;
362 }
363
364 for ( ; trailing; trailing--) {
365 if (in >= inend)
366 break;
367 if (((d= *in++) & 0xC0) != 0x80) {
368 *outlen = out - outstart;
369 *inlen = processed - instart;
370 return(-2);
371 }
372 c <<= 6;
373 c |= d & 0x3F;
374 }
375
376 /* assertion: c is a single UTF-4 value */
377 if (c <= 0xFF) {
378 if (out >= outend)
379 break;
380 *out++ = c;
381 } else {
382 /* no chance for this in IsoLat1 */
383 *outlen = out - outstart;
384 *inlen = processed - instart;
385 return(-2);
386 }
387 processed = in;
388 }
389 *outlen = out - outstart;
390 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000391 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000392}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000393#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000394
395/**
396 * UTF16LEToUTF8:
397 * @out: a pointer to an array of bytes to store the result
398 * @outlen: the length of @out
399 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
400 * @inlenb: the length of @in in UTF-16LE chars
401 *
402 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000403 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000404 * is the same between the native type of this machine and the
405 * inputed one.
406 *
William M. Brackf9415e42003-11-28 09:39:10 +0000407 * Returns the number of bytes written, or -1 if lack of space, or -2
408 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000409 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000410 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000412static int
Owen Taylor3473f882001-02-23 17:55:21 +0000413UTF16LEToUTF8(unsigned char* out, int *outlen,
414 const unsigned char* inb, int *inlenb)
415{
416 unsigned char* outstart = out;
417 const unsigned char* processed = inb;
418 unsigned char* outend = out + *outlen;
419 unsigned short* in = (unsigned short*) inb;
420 unsigned short* inend;
421 unsigned int c, d, inlen;
422 unsigned char *tmp;
423 int bits;
424
425 if ((*inlenb % 2) == 1)
426 (*inlenb)--;
427 inlen = *inlenb / 2;
428 inend = in + inlen;
429 while ((in < inend) && (out - outstart + 5 < *outlen)) {
430 if (xmlLittleEndian) {
431 c= *in++;
432 } else {
433 tmp = (unsigned char *) in;
434 c = *tmp++;
435 c = c | (((unsigned int)*tmp) << 8);
436 in++;
437 }
438 if ((c & 0xFC00) == 0xD800) { /* surrogates */
439 if (in >= inend) { /* (in > inend) shouldn't happens */
440 break;
441 }
442 if (xmlLittleEndian) {
443 d = *in++;
444 } else {
445 tmp = (unsigned char *) in;
446 d = *tmp++;
447 d = d | (((unsigned int)*tmp) << 8);
448 in++;
449 }
450 if ((d & 0xFC00) == 0xDC00) {
451 c &= 0x03FF;
452 c <<= 10;
453 c |= d & 0x03FF;
454 c += 0x10000;
455 }
456 else {
457 *outlen = out - outstart;
458 *inlenb = processed - inb;
459 return(-2);
460 }
461 }
462
463 /* assertion: c is a single UTF-4 value */
464 if (out >= outend)
465 break;
466 if (c < 0x80) { *out++= c; bits= -6; }
467 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
468 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
469 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
470
471 for ( ; bits >= 0; bits-= 6) {
472 if (out >= outend)
473 break;
474 *out++= ((c >> bits) & 0x3F) | 0x80;
475 }
476 processed = (const unsigned char*) in;
477 }
478 *outlen = out - outstart;
479 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000480 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000481}
482
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000483#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000484/**
485 * UTF8ToUTF16LE:
486 * @outb: a pointer to an array of bytes to store the result
487 * @outlen: the length of @outb
488 * @in: a pointer to an array of UTF-8 chars
489 * @inlen: the length of @in
490 *
491 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
492 * block of chars out.
493 *
William M. Brackf9415e42003-11-28 09:39:10 +0000494 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000495 * if the transcoding failed.
496 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000497static int
Owen Taylor3473f882001-02-23 17:55:21 +0000498UTF8ToUTF16LE(unsigned char* outb, int *outlen,
499 const unsigned char* in, int *inlen)
500{
501 unsigned short* out = (unsigned short*) outb;
502 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000503 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000504 unsigned short* outstart= out;
505 unsigned short* outend;
506 const unsigned char* inend= in+*inlen;
507 unsigned int c, d;
508 int trailing;
509 unsigned char *tmp;
510 unsigned short tmp1, tmp2;
511
William M. Brackf9415e42003-11-28 09:39:10 +0000512 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000513 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000514 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000515 *outlen = 0;
516 *inlen = 0;
517 return(0);
518 }
519 outend = out + (*outlen / 2);
520 while (in < inend) {
521 d= *in++;
522 if (d < 0x80) { c= d; trailing= 0; }
523 else if (d < 0xC0) {
524 /* trailing byte in leading position */
525 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000526 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 return(-2);
528 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
529 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
530 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
531 else {
532 /* no chance for this in UTF-16 */
533 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000534 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000535 return(-2);
536 }
537
538 if (inend - in < trailing) {
539 break;
540 }
541
542 for ( ; trailing; trailing--) {
543 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
544 break;
545 c <<= 6;
546 c |= d & 0x3F;
547 }
548
549 /* assertion: c is a single UTF-4 value */
550 if (c < 0x10000) {
551 if (out >= outend)
552 break;
553 if (xmlLittleEndian) {
554 *out++ = c;
555 } else {
556 tmp = (unsigned char *) out;
557 *tmp = c ;
558 *(tmp + 1) = c >> 8 ;
559 out++;
560 }
561 }
562 else if (c < 0x110000) {
563 if (out+1 >= outend)
564 break;
565 c -= 0x10000;
566 if (xmlLittleEndian) {
567 *out++ = 0xD800 | (c >> 10);
568 *out++ = 0xDC00 | (c & 0x03FF);
569 } else {
570 tmp1 = 0xD800 | (c >> 10);
571 tmp = (unsigned char *) out;
572 *tmp = (unsigned char) tmp1;
573 *(tmp + 1) = tmp1 >> 8;
574 out++;
575
576 tmp2 = 0xDC00 | (c & 0x03FF);
577 tmp = (unsigned char *) out;
578 *tmp = (unsigned char) tmp2;
579 *(tmp + 1) = tmp2 >> 8;
580 out++;
581 }
582 }
583 else
584 break;
585 processed = in;
586 }
587 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000588 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000589 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000590}
591
592/**
William M. Brackf9415e42003-11-28 09:39:10 +0000593 * UTF8ToUTF16:
594 * @outb: a pointer to an array of bytes to store the result
595 * @outlen: the length of @outb
596 * @in: a pointer to an array of UTF-8 chars
597 * @inlen: the length of @in
598 *
599 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
600 * block of chars out.
601 *
602 * Returns the number of bytes written, or -1 if lack of space, or -2
603 * if the transcoding failed.
604 */
605static int
606UTF8ToUTF16(unsigned char* outb, int *outlen,
607 const unsigned char* in, int *inlen)
608{
609 if (in == NULL) {
610 /*
611 * initialization, add the Byte Order Mark for UTF-16LE
612 */
613 if (*outlen >= 2) {
614 outb[0] = 0xFF;
615 outb[1] = 0xFE;
616 *outlen = 2;
617 *inlen = 0;
618#ifdef DEBUG_ENCODING
619 xmlGenericError(xmlGenericErrorContext,
620 "Added FFFE Byte Order Mark\n");
621#endif
622 return(2);
623 }
624 *outlen = 0;
625 *inlen = 0;
626 return(0);
627 }
628 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
629}
William M. Brack030a7a12004-02-10 12:48:57 +0000630#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000631
632/**
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * UTF16BEToUTF8:
634 * @out: a pointer to an array of bytes to store the result
635 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000636 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000637 * @inlenb: the length of @in in UTF-16 chars
638 *
639 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000640 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000641 * is the same between the native type of this machine and the
642 * inputed one.
643 *
William M. Brackf9415e42003-11-28 09:39:10 +0000644 * Returns the number of bytes written, or -1 if lack of space, or -2
645 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000646 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000647 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000648 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000649static int
Owen Taylor3473f882001-02-23 17:55:21 +0000650UTF16BEToUTF8(unsigned char* out, int *outlen,
651 const unsigned char* inb, int *inlenb)
652{
653 unsigned char* outstart = out;
654 const unsigned char* processed = inb;
655 unsigned char* outend = out + *outlen;
656 unsigned short* in = (unsigned short*) inb;
657 unsigned short* inend;
658 unsigned int c, d, inlen;
659 unsigned char *tmp;
660 int bits;
661
662 if ((*inlenb % 2) == 1)
663 (*inlenb)--;
664 inlen = *inlenb / 2;
665 inend= in + inlen;
666 while (in < inend) {
667 if (xmlLittleEndian) {
668 tmp = (unsigned char *) in;
669 c = *tmp++;
670 c = c << 8;
671 c = c | (unsigned int) *tmp;
672 in++;
673 } else {
674 c= *in++;
675 }
676 if ((c & 0xFC00) == 0xD800) { /* surrogates */
677 if (in >= inend) { /* (in > inend) shouldn't happens */
678 *outlen = out - outstart;
679 *inlenb = processed - inb;
680 return(-2);
681 }
682 if (xmlLittleEndian) {
683 tmp = (unsigned char *) in;
684 d = *tmp++;
685 d = d << 8;
686 d = d | (unsigned int) *tmp;
687 in++;
688 } else {
689 d= *in++;
690 }
691 if ((d & 0xFC00) == 0xDC00) {
692 c &= 0x03FF;
693 c <<= 10;
694 c |= d & 0x03FF;
695 c += 0x10000;
696 }
697 else {
698 *outlen = out - outstart;
699 *inlenb = processed - inb;
700 return(-2);
701 }
702 }
703
704 /* assertion: c is a single UTF-4 value */
705 if (out >= outend)
706 break;
707 if (c < 0x80) { *out++= c; bits= -6; }
708 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
709 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
710 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
711
712 for ( ; bits >= 0; bits-= 6) {
713 if (out >= outend)
714 break;
715 *out++= ((c >> bits) & 0x3F) | 0x80;
716 }
717 processed = (const unsigned char*) in;
718 }
719 *outlen = out - outstart;
720 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000721 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000722}
723
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000724#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000725/**
726 * UTF8ToUTF16BE:
727 * @outb: a pointer to an array of bytes to store the result
728 * @outlen: the length of @outb
729 * @in: a pointer to an array of UTF-8 chars
730 * @inlen: the length of @in
731 *
732 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
733 * block of chars out.
734 *
735 * Returns the number of byte written, or -1 by lack of space, or -2
736 * if the transcoding failed.
737 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000738static int
Owen Taylor3473f882001-02-23 17:55:21 +0000739UTF8ToUTF16BE(unsigned char* outb, int *outlen,
740 const unsigned char* in, int *inlen)
741{
742 unsigned short* out = (unsigned short*) outb;
743 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000744 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000745 unsigned short* outstart= out;
746 unsigned short* outend;
747 const unsigned char* inend= in+*inlen;
748 unsigned int c, d;
749 int trailing;
750 unsigned char *tmp;
751 unsigned short tmp1, tmp2;
752
William M. Brackf9415e42003-11-28 09:39:10 +0000753 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000754 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000755 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000756 *outlen = 0;
757 *inlen = 0;
758 return(0);
759 }
760 outend = out + (*outlen / 2);
761 while (in < inend) {
762 d= *in++;
763 if (d < 0x80) { c= d; trailing= 0; }
764 else if (d < 0xC0) {
765 /* trailing byte in leading position */
766 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000767 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000768 return(-2);
769 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
770 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
771 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
772 else {
773 /* no chance for this in UTF-16 */
774 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000775 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000776 return(-2);
777 }
778
779 if (inend - in < trailing) {
780 break;
781 }
782
783 for ( ; trailing; trailing--) {
784 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
785 c <<= 6;
786 c |= d & 0x3F;
787 }
788
789 /* assertion: c is a single UTF-4 value */
790 if (c < 0x10000) {
791 if (out >= outend) break;
792 if (xmlLittleEndian) {
793 tmp = (unsigned char *) out;
794 *tmp = c >> 8;
795 *(tmp + 1) = c;
796 out++;
797 } else {
798 *out++ = c;
799 }
800 }
801 else if (c < 0x110000) {
802 if (out+1 >= outend) break;
803 c -= 0x10000;
804 if (xmlLittleEndian) {
805 tmp1 = 0xD800 | (c >> 10);
806 tmp = (unsigned char *) out;
807 *tmp = tmp1 >> 8;
808 *(tmp + 1) = (unsigned char) tmp1;
809 out++;
810
811 tmp2 = 0xDC00 | (c & 0x03FF);
812 tmp = (unsigned char *) out;
813 *tmp = tmp2 >> 8;
814 *(tmp + 1) = (unsigned char) tmp2;
815 out++;
816 } else {
817 *out++ = 0xD800 | (c >> 10);
818 *out++ = 0xDC00 | (c & 0x03FF);
819 }
820 }
821 else
822 break;
823 processed = in;
824 }
825 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000826 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000827 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000828}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000829#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000830
Daniel Veillard97ac1312001-05-30 19:14:17 +0000831/************************************************************************
832 * *
833 * Generic encoding handling routines *
834 * *
835 ************************************************************************/
836
Owen Taylor3473f882001-02-23 17:55:21 +0000837/**
838 * xmlDetectCharEncoding:
839 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000840 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000841 * @len: pointer to the length of the buffer
842 *
843 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000844 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000845 *
846 * Returns one of the XML_CHAR_ENCODING_... values.
847 */
848xmlCharEncoding
849xmlDetectCharEncoding(const unsigned char* in, int len)
850{
Daniel Veillardce682bc2004-11-05 17:22:25 +0000851 if (in == NULL)
852 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000853 if (len >= 4) {
854 if ((in[0] == 0x00) && (in[1] == 0x00) &&
855 (in[2] == 0x00) && (in[3] == 0x3C))
856 return(XML_CHAR_ENCODING_UCS4BE);
857 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
858 (in[2] == 0x00) && (in[3] == 0x00))
859 return(XML_CHAR_ENCODING_UCS4LE);
860 if ((in[0] == 0x00) && (in[1] == 0x00) &&
861 (in[2] == 0x3C) && (in[3] == 0x00))
862 return(XML_CHAR_ENCODING_UCS4_2143);
863 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
864 (in[2] == 0x00) && (in[3] == 0x00))
865 return(XML_CHAR_ENCODING_UCS4_3412);
866 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
867 (in[2] == 0xA7) && (in[3] == 0x94))
868 return(XML_CHAR_ENCODING_EBCDIC);
869 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
870 (in[2] == 0x78) && (in[3] == 0x6D))
871 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000872 /*
873 * Although not part of the recommendation, we also
874 * attempt an "auto-recognition" of UTF-16LE and
875 * UTF-16BE encodings.
876 */
877 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
878 (in[2] == 0x3F) && (in[3] == 0x00))
879 return(XML_CHAR_ENCODING_UTF16LE);
880 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
881 (in[2] == 0x00) && (in[3] == 0x3F))
882 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000883 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000884 if (len >= 3) {
885 /*
886 * Errata on XML-1.0 June 20 2001
887 * We now allow an UTF8 encoded BOM
888 */
889 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
890 (in[2] == 0xBF))
891 return(XML_CHAR_ENCODING_UTF8);
892 }
William M. Brackf9415e42003-11-28 09:39:10 +0000893 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000894 if (len >= 2) {
895 if ((in[0] == 0xFE) && (in[1] == 0xFF))
896 return(XML_CHAR_ENCODING_UTF16BE);
897 if ((in[0] == 0xFF) && (in[1] == 0xFE))
898 return(XML_CHAR_ENCODING_UTF16LE);
899 }
900 return(XML_CHAR_ENCODING_NONE);
901}
902
903/**
904 * xmlCleanupEncodingAliases:
905 *
906 * Unregisters all aliases
907 */
908void
909xmlCleanupEncodingAliases(void) {
910 int i;
911
912 if (xmlCharEncodingAliases == NULL)
913 return;
914
915 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
916 if (xmlCharEncodingAliases[i].name != NULL)
917 xmlFree((char *) xmlCharEncodingAliases[i].name);
918 if (xmlCharEncodingAliases[i].alias != NULL)
919 xmlFree((char *) xmlCharEncodingAliases[i].alias);
920 }
921 xmlCharEncodingAliasesNb = 0;
922 xmlCharEncodingAliasesMax = 0;
923 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000924 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000925}
926
927/**
928 * xmlGetEncodingAlias:
929 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
930 *
931 * Lookup an encoding name for the given alias.
932 *
William M. Brackf9415e42003-11-28 09:39:10 +0000933 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000934 */
935const char *
936xmlGetEncodingAlias(const char *alias) {
937 int i;
938 char upper[100];
939
940 if (alias == NULL)
941 return(NULL);
942
943 if (xmlCharEncodingAliases == NULL)
944 return(NULL);
945
946 for (i = 0;i < 99;i++) {
947 upper[i] = toupper(alias[i]);
948 if (upper[i] == 0) break;
949 }
950 upper[i] = 0;
951
952 /*
953 * Walk down the list looking for a definition of the alias
954 */
955 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
956 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
957 return(xmlCharEncodingAliases[i].name);
958 }
959 }
960 return(NULL);
961}
962
963/**
964 * xmlAddEncodingAlias:
965 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
966 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
967 *
William M. Brackf9415e42003-11-28 09:39:10 +0000968 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000969 * will be overwritten.
970 *
971 * Returns 0 in case of success, -1 in case of error
972 */
973int
974xmlAddEncodingAlias(const char *name, const char *alias) {
975 int i;
976 char upper[100];
977
978 if ((name == NULL) || (alias == NULL))
979 return(-1);
980
981 for (i = 0;i < 99;i++) {
982 upper[i] = toupper(alias[i]);
983 if (upper[i] == 0) break;
984 }
985 upper[i] = 0;
986
987 if (xmlCharEncodingAliases == NULL) {
988 xmlCharEncodingAliasesNb = 0;
989 xmlCharEncodingAliasesMax = 20;
990 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
991 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
992 if (xmlCharEncodingAliases == NULL)
993 return(-1);
994 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
995 xmlCharEncodingAliasesMax *= 2;
996 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
997 xmlRealloc(xmlCharEncodingAliases,
998 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
999 }
1000 /*
1001 * Walk down the list looking for a definition of the alias
1002 */
1003 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1004 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1005 /*
1006 * Replace the definition.
1007 */
1008 xmlFree((char *) xmlCharEncodingAliases[i].name);
1009 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1010 return(0);
1011 }
1012 }
1013 /*
1014 * Add the definition
1015 */
1016 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1017 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1018 xmlCharEncodingAliasesNb++;
1019 return(0);
1020}
1021
1022/**
1023 * xmlDelEncodingAlias:
1024 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1025 *
1026 * Unregisters an encoding alias @alias
1027 *
1028 * Returns 0 in case of success, -1 in case of error
1029 */
1030int
1031xmlDelEncodingAlias(const char *alias) {
1032 int i;
1033
1034 if (alias == NULL)
1035 return(-1);
1036
1037 if (xmlCharEncodingAliases == NULL)
1038 return(-1);
1039 /*
1040 * Walk down the list looking for a definition of the alias
1041 */
1042 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1043 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1044 xmlFree((char *) xmlCharEncodingAliases[i].name);
1045 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1046 xmlCharEncodingAliasesNb--;
1047 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1048 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1049 return(0);
1050 }
1051 }
1052 return(-1);
1053}
1054
1055/**
1056 * xmlParseCharEncoding:
1057 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1058 *
William M. Brackf9415e42003-11-28 09:39:10 +00001059 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * that the comparison is case insensitive accordingly to the section
1061 * [XML] 4.3.3 Character Encoding in Entities.
1062 *
1063 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1064 * if not recognized.
1065 */
1066xmlCharEncoding
1067xmlParseCharEncoding(const char* name)
1068{
1069 const char *alias;
1070 char upper[500];
1071 int i;
1072
1073 if (name == NULL)
1074 return(XML_CHAR_ENCODING_NONE);
1075
1076 /*
1077 * Do the alias resolution
1078 */
1079 alias = xmlGetEncodingAlias(name);
1080 if (alias != NULL)
1081 name = alias;
1082
1083 for (i = 0;i < 499;i++) {
1084 upper[i] = toupper(name[i]);
1085 if (upper[i] == 0) break;
1086 }
1087 upper[i] = 0;
1088
1089 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1090 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1091 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1092
1093 /*
1094 * NOTE: if we were able to parse this, the endianness of UTF16 is
1095 * already found and in use
1096 */
1097 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1098 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1099
1100 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1101 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1102 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1103
1104 /*
1105 * NOTE: if we were able to parse this, the endianness of UCS4 is
1106 * already found and in use
1107 */
1108 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1109 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1110 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1111
1112
1113 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1114 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1115 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1116
1117 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1118 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1119 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1120
1121 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1122 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1123 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1124 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1125 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1126 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1127 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1128
1129 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1130 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1131 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1132
1133#ifdef DEBUG_ENCODING
1134 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1135#endif
1136 return(XML_CHAR_ENCODING_ERROR);
1137}
1138
1139/**
1140 * xmlGetCharEncodingName:
1141 * @enc: the encoding
1142 *
1143 * The "canonical" name for XML encoding.
1144 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1145 * Section 4.3.3 Character Encoding in Entities
1146 *
1147 * Returns the canonical name for the given encoding
1148 */
1149
1150const char*
1151xmlGetCharEncodingName(xmlCharEncoding enc) {
1152 switch (enc) {
1153 case XML_CHAR_ENCODING_ERROR:
1154 return(NULL);
1155 case XML_CHAR_ENCODING_NONE:
1156 return(NULL);
1157 case XML_CHAR_ENCODING_UTF8:
1158 return("UTF-8");
1159 case XML_CHAR_ENCODING_UTF16LE:
1160 return("UTF-16");
1161 case XML_CHAR_ENCODING_UTF16BE:
1162 return("UTF-16");
1163 case XML_CHAR_ENCODING_EBCDIC:
1164 return("EBCDIC");
1165 case XML_CHAR_ENCODING_UCS4LE:
1166 return("ISO-10646-UCS-4");
1167 case XML_CHAR_ENCODING_UCS4BE:
1168 return("ISO-10646-UCS-4");
1169 case XML_CHAR_ENCODING_UCS4_2143:
1170 return("ISO-10646-UCS-4");
1171 case XML_CHAR_ENCODING_UCS4_3412:
1172 return("ISO-10646-UCS-4");
1173 case XML_CHAR_ENCODING_UCS2:
1174 return("ISO-10646-UCS-2");
1175 case XML_CHAR_ENCODING_8859_1:
1176 return("ISO-8859-1");
1177 case XML_CHAR_ENCODING_8859_2:
1178 return("ISO-8859-2");
1179 case XML_CHAR_ENCODING_8859_3:
1180 return("ISO-8859-3");
1181 case XML_CHAR_ENCODING_8859_4:
1182 return("ISO-8859-4");
1183 case XML_CHAR_ENCODING_8859_5:
1184 return("ISO-8859-5");
1185 case XML_CHAR_ENCODING_8859_6:
1186 return("ISO-8859-6");
1187 case XML_CHAR_ENCODING_8859_7:
1188 return("ISO-8859-7");
1189 case XML_CHAR_ENCODING_8859_8:
1190 return("ISO-8859-8");
1191 case XML_CHAR_ENCODING_8859_9:
1192 return("ISO-8859-9");
1193 case XML_CHAR_ENCODING_2022_JP:
1194 return("ISO-2022-JP");
1195 case XML_CHAR_ENCODING_SHIFT_JIS:
1196 return("Shift-JIS");
1197 case XML_CHAR_ENCODING_EUC_JP:
1198 return("EUC-JP");
1199 case XML_CHAR_ENCODING_ASCII:
1200 return(NULL);
1201 }
1202 return(NULL);
1203}
1204
Daniel Veillard97ac1312001-05-30 19:14:17 +00001205/************************************************************************
1206 * *
1207 * Char encoding handlers *
1208 * *
1209 ************************************************************************/
1210
Owen Taylor3473f882001-02-23 17:55:21 +00001211
1212/* the size should be growable, but it's not a big deal ... */
1213#define MAX_ENCODING_HANDLERS 50
1214static xmlCharEncodingHandlerPtr *handlers = NULL;
1215static int nbCharEncodingHandler = 0;
1216
1217/*
1218 * The default is UTF-8 for XML, that's also the default used for the
1219 * parser internals, so the default encoding handler is NULL
1220 */
1221
1222static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1223
1224/**
1225 * xmlNewCharEncodingHandler:
1226 * @name: the encoding name, in UTF-8 format (ASCII actually)
1227 * @input: the xmlCharEncodingInputFunc to read that encoding
1228 * @output: the xmlCharEncodingOutputFunc to write that encoding
1229 *
1230 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001231 *
Owen Taylor3473f882001-02-23 17:55:21 +00001232 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1233 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001234xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001235xmlNewCharEncodingHandler(const char *name,
1236 xmlCharEncodingInputFunc input,
1237 xmlCharEncodingOutputFunc output) {
1238 xmlCharEncodingHandlerPtr handler;
1239 const char *alias;
1240 char upper[500];
1241 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001242 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001243
1244 /*
1245 * Do the alias resolution
1246 */
1247 alias = xmlGetEncodingAlias(name);
1248 if (alias != NULL)
1249 name = alias;
1250
1251 /*
1252 * Keep only the uppercase version of the encoding.
1253 */
1254 if (name == NULL) {
1255 xmlGenericError(xmlGenericErrorContext,
1256 "xmlNewCharEncodingHandler : no name !\n");
1257 return(NULL);
1258 }
1259 for (i = 0;i < 499;i++) {
1260 upper[i] = toupper(name[i]);
1261 if (upper[i] == 0) break;
1262 }
1263 upper[i] = 0;
1264 up = xmlMemStrdup(upper);
1265 if (up == NULL) {
1266 xmlGenericError(xmlGenericErrorContext,
1267 "xmlNewCharEncodingHandler : out of memory !\n");
1268 return(NULL);
1269 }
1270
1271 /*
1272 * allocate and fill-up an handler block.
1273 */
1274 handler = (xmlCharEncodingHandlerPtr)
1275 xmlMalloc(sizeof(xmlCharEncodingHandler));
1276 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001277 xmlFree(up);
Owen Taylor3473f882001-02-23 17:55:21 +00001278 xmlGenericError(xmlGenericErrorContext,
1279 "xmlNewCharEncodingHandler : out of memory !\n");
1280 return(NULL);
1281 }
1282 handler->input = input;
1283 handler->output = output;
1284 handler->name = up;
1285
1286#ifdef LIBXML_ICONV_ENABLED
1287 handler->iconv_in = NULL;
1288 handler->iconv_out = NULL;
1289#endif /* LIBXML_ICONV_ENABLED */
1290
1291 /*
1292 * registers and returns the handler.
1293 */
1294 xmlRegisterCharEncodingHandler(handler);
1295#ifdef DEBUG_ENCODING
1296 xmlGenericError(xmlGenericErrorContext,
1297 "Registered encoding handler for %s\n", name);
1298#endif
1299 return(handler);
1300}
1301
1302/**
1303 * xmlInitCharEncodingHandlers:
1304 *
1305 * Initialize the char encoding support, it registers the default
1306 * encoding supported.
1307 * NOTE: while public, this function usually doesn't need to be called
1308 * in normal processing.
1309 */
1310void
1311xmlInitCharEncodingHandlers(void) {
1312 unsigned short int tst = 0x1234;
1313 unsigned char *ptr = (unsigned char *) &tst;
1314
1315 if (handlers != NULL) return;
1316
1317 handlers = (xmlCharEncodingHandlerPtr *)
1318 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1319
1320 if (*ptr == 0x12) xmlLittleEndian = 0;
1321 else if (*ptr == 0x34) xmlLittleEndian = 1;
1322 else xmlGenericError(xmlGenericErrorContext,
1323 "Odd problem at endianness detection\n");
1324
1325 if (handlers == NULL) {
1326 xmlGenericError(xmlGenericErrorContext,
1327 "xmlInitCharEncodingHandlers : out of memory !\n");
1328 return;
1329 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001330 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001331#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001332 xmlUTF16LEHandler =
1333 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1334 xmlUTF16BEHandler =
1335 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001336 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001337 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1338 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001339 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001340#ifdef LIBXML_HTML_ENABLED
1341 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1342#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001343#else
1344 xmlUTF16LEHandler =
1345 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1346 xmlUTF16BEHandler =
1347 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001348 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001349 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1350 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1351 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1352#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001353#ifndef LIBXML_ICONV_ENABLED
1354#ifdef LIBXML_ISO8859X_ENABLED
1355 xmlRegisterCharEncodingHandlersISO8859x ();
1356#endif
1357#endif
1358
Owen Taylor3473f882001-02-23 17:55:21 +00001359}
1360
1361/**
1362 * xmlCleanupCharEncodingHandlers:
1363 *
1364 * Cleanup the memory allocated for the char encoding support, it
1365 * unregisters all the encoding handlers and the aliases.
1366 */
1367void
1368xmlCleanupCharEncodingHandlers(void) {
1369 xmlCleanupEncodingAliases();
1370
1371 if (handlers == NULL) return;
1372
1373 for (;nbCharEncodingHandler > 0;) {
1374 nbCharEncodingHandler--;
1375 if (handlers[nbCharEncodingHandler] != NULL) {
1376 if (handlers[nbCharEncodingHandler]->name != NULL)
1377 xmlFree(handlers[nbCharEncodingHandler]->name);
1378 xmlFree(handlers[nbCharEncodingHandler]);
1379 }
1380 }
1381 xmlFree(handlers);
1382 handlers = NULL;
1383 nbCharEncodingHandler = 0;
1384 xmlDefaultCharEncodingHandler = NULL;
1385}
1386
1387/**
1388 * xmlRegisterCharEncodingHandler:
1389 * @handler: the xmlCharEncodingHandlerPtr handler block
1390 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001391 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001392 */
1393void
1394xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1395 if (handlers == NULL) xmlInitCharEncodingHandlers();
1396 if (handler == NULL) {
1397 xmlGenericError(xmlGenericErrorContext,
1398 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1399 return;
1400 }
1401
1402 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1403 xmlGenericError(xmlGenericErrorContext,
1404 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1405 xmlGenericError(xmlGenericErrorContext,
1406 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1407 return;
1408 }
1409 handlers[nbCharEncodingHandler++] = handler;
1410}
1411
1412/**
1413 * xmlGetCharEncodingHandler:
1414 * @enc: an xmlCharEncoding value.
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 *
1418 * Returns the handler or NULL if not found
1419 */
1420xmlCharEncodingHandlerPtr
1421xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1422 xmlCharEncodingHandlerPtr handler;
1423
1424 if (handlers == NULL) xmlInitCharEncodingHandlers();
1425 switch (enc) {
1426 case XML_CHAR_ENCODING_ERROR:
1427 return(NULL);
1428 case XML_CHAR_ENCODING_NONE:
1429 return(NULL);
1430 case XML_CHAR_ENCODING_UTF8:
1431 return(NULL);
1432 case XML_CHAR_ENCODING_UTF16LE:
1433 return(xmlUTF16LEHandler);
1434 case XML_CHAR_ENCODING_UTF16BE:
1435 return(xmlUTF16BEHandler);
1436 case XML_CHAR_ENCODING_EBCDIC:
1437 handler = xmlFindCharEncodingHandler("EBCDIC");
1438 if (handler != NULL) return(handler);
1439 handler = xmlFindCharEncodingHandler("ebcdic");
1440 if (handler != NULL) return(handler);
1441 break;
1442 case XML_CHAR_ENCODING_UCS4BE:
1443 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1444 if (handler != NULL) return(handler);
1445 handler = xmlFindCharEncodingHandler("UCS-4");
1446 if (handler != NULL) return(handler);
1447 handler = xmlFindCharEncodingHandler("UCS4");
1448 if (handler != NULL) return(handler);
1449 break;
1450 case XML_CHAR_ENCODING_UCS4LE:
1451 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1452 if (handler != NULL) return(handler);
1453 handler = xmlFindCharEncodingHandler("UCS-4");
1454 if (handler != NULL) return(handler);
1455 handler = xmlFindCharEncodingHandler("UCS4");
1456 if (handler != NULL) return(handler);
1457 break;
1458 case XML_CHAR_ENCODING_UCS4_2143:
1459 break;
1460 case XML_CHAR_ENCODING_UCS4_3412:
1461 break;
1462 case XML_CHAR_ENCODING_UCS2:
1463 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1464 if (handler != NULL) return(handler);
1465 handler = xmlFindCharEncodingHandler("UCS-2");
1466 if (handler != NULL) return(handler);
1467 handler = xmlFindCharEncodingHandler("UCS2");
1468 if (handler != NULL) return(handler);
1469 break;
1470
1471 /*
1472 * We used to keep ISO Latin encodings native in the
1473 * generated data. This led to so many problems that
1474 * this has been removed. One can still change this
1475 * back by registering no-ops encoders for those
1476 */
1477 case XML_CHAR_ENCODING_8859_1:
1478 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1479 if (handler != NULL) return(handler);
1480 break;
1481 case XML_CHAR_ENCODING_8859_2:
1482 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1483 if (handler != NULL) return(handler);
1484 break;
1485 case XML_CHAR_ENCODING_8859_3:
1486 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1487 if (handler != NULL) return(handler);
1488 break;
1489 case XML_CHAR_ENCODING_8859_4:
1490 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1491 if (handler != NULL) return(handler);
1492 break;
1493 case XML_CHAR_ENCODING_8859_5:
1494 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1495 if (handler != NULL) return(handler);
1496 break;
1497 case XML_CHAR_ENCODING_8859_6:
1498 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1499 if (handler != NULL) return(handler);
1500 break;
1501 case XML_CHAR_ENCODING_8859_7:
1502 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1503 if (handler != NULL) return(handler);
1504 break;
1505 case XML_CHAR_ENCODING_8859_8:
1506 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1507 if (handler != NULL) return(handler);
1508 break;
1509 case XML_CHAR_ENCODING_8859_9:
1510 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1511 if (handler != NULL) return(handler);
1512 break;
1513
1514
1515 case XML_CHAR_ENCODING_2022_JP:
1516 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1517 if (handler != NULL) return(handler);
1518 break;
1519 case XML_CHAR_ENCODING_SHIFT_JIS:
1520 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1521 if (handler != NULL) return(handler);
1522 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1523 if (handler != NULL) return(handler);
1524 handler = xmlFindCharEncodingHandler("Shift_JIS");
1525 if (handler != NULL) return(handler);
1526 break;
1527 case XML_CHAR_ENCODING_EUC_JP:
1528 handler = xmlFindCharEncodingHandler("EUC-JP");
1529 if (handler != NULL) return(handler);
1530 break;
1531 default:
1532 break;
1533 }
1534
1535#ifdef DEBUG_ENCODING
1536 xmlGenericError(xmlGenericErrorContext,
1537 "No handler found for encoding %d\n", enc);
1538#endif
1539 return(NULL);
1540}
1541
1542/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001543 * xmlFindCharEncodingHandler:
1544 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001545 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001546 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001547 *
1548 * Returns the handler or NULL if not found
1549 */
1550xmlCharEncodingHandlerPtr
1551xmlFindCharEncodingHandler(const char *name) {
1552 const char *nalias;
1553 const char *norig;
1554 xmlCharEncoding alias;
1555#ifdef LIBXML_ICONV_ENABLED
1556 xmlCharEncodingHandlerPtr enc;
1557 iconv_t icv_in, icv_out;
1558#endif /* LIBXML_ICONV_ENABLED */
1559 char upper[100];
1560 int i;
1561
1562 if (handlers == NULL) xmlInitCharEncodingHandlers();
1563 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1564 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1565
1566 /*
1567 * Do the alias resolution
1568 */
1569 norig = name;
1570 nalias = xmlGetEncodingAlias(name);
1571 if (nalias != NULL)
1572 name = nalias;
1573
1574 /*
1575 * Check first for directly registered encoding names
1576 */
1577 for (i = 0;i < 99;i++) {
1578 upper[i] = toupper(name[i]);
1579 if (upper[i] == 0) break;
1580 }
1581 upper[i] = 0;
1582
1583 for (i = 0;i < nbCharEncodingHandler; i++)
1584 if (!strcmp(upper, handlers[i]->name)) {
1585#ifdef DEBUG_ENCODING
1586 xmlGenericError(xmlGenericErrorContext,
1587 "Found registered handler for encoding %s\n", name);
1588#endif
1589 return(handlers[i]);
1590 }
1591
1592#ifdef LIBXML_ICONV_ENABLED
1593 /* check whether iconv can handle this */
1594 icv_in = iconv_open("UTF-8", name);
1595 icv_out = iconv_open(name, "UTF-8");
1596 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1597 enc = (xmlCharEncodingHandlerPtr)
1598 xmlMalloc(sizeof(xmlCharEncodingHandler));
1599 if (enc == NULL) {
1600 iconv_close(icv_in);
1601 iconv_close(icv_out);
1602 return(NULL);
1603 }
1604 enc->name = xmlMemStrdup(name);
1605 enc->input = NULL;
1606 enc->output = NULL;
1607 enc->iconv_in = icv_in;
1608 enc->iconv_out = icv_out;
1609#ifdef DEBUG_ENCODING
1610 xmlGenericError(xmlGenericErrorContext,
1611 "Found iconv handler for encoding %s\n", name);
1612#endif
1613 return enc;
1614 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1615 xmlGenericError(xmlGenericErrorContext,
1616 "iconv : problems with filters for '%s'\n", name);
1617 }
1618#endif /* LIBXML_ICONV_ENABLED */
1619
1620#ifdef DEBUG_ENCODING
1621 xmlGenericError(xmlGenericErrorContext,
1622 "No handler found for encoding %s\n", name);
1623#endif
1624
1625 /*
1626 * Fallback using the canonical names
1627 */
1628 alias = xmlParseCharEncoding(norig);
1629 if (alias != XML_CHAR_ENCODING_ERROR) {
1630 const char* canon;
1631 canon = xmlGetCharEncodingName(alias);
1632 if ((canon != NULL) && (strcmp(name, canon))) {
1633 return(xmlFindCharEncodingHandler(canon));
1634 }
1635 }
1636
William M. Brackf9415e42003-11-28 09:39:10 +00001637 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001638 return(NULL);
1639}
1640
Daniel Veillard97ac1312001-05-30 19:14:17 +00001641/************************************************************************
1642 * *
1643 * ICONV based generic conversion functions *
1644 * *
1645 ************************************************************************/
1646
Owen Taylor3473f882001-02-23 17:55:21 +00001647#ifdef LIBXML_ICONV_ENABLED
1648/**
1649 * xmlIconvWrapper:
1650 * @cd: iconv converter data structure
1651 * @out: a pointer to an array of bytes to store the result
1652 * @outlen: the length of @out
1653 * @in: a pointer to an array of ISO Latin 1 chars
1654 * @inlen: the length of @in
1655 *
1656 * Returns 0 if success, or
1657 * -1 by lack of space, or
1658 * -2 if the transcoding fails (for *in is not valid utf8 string or
1659 * the result of transformation can't fit into the encoding we want), or
1660 * -3 if there the last byte can't form a single output char.
1661 *
1662 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001663 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001664 * The value of @outlen after return is the number of ocetes consumed.
1665 */
1666static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001667xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1668 const unsigned char *in, int *inlen) {
1669 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001670 const char *icv_in = (const char *) in;
1671 char *icv_out = (char *) out;
1672 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001673
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001674 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1675 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001676 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001677 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001678 icv_inlen = *inlen;
1679 icv_outlen = *outlen;
Darin Adler699613b2001-07-27 22:47:14 +00001680 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001681 if (in != NULL) {
1682 *inlen -= icv_inlen;
1683 *outlen -= icv_outlen;
1684 } else {
1685 *inlen = 0;
1686 *outlen = 0;
1687 }
1688 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001689#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001690 if (errno == EILSEQ) {
1691 return -2;
1692 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001693#endif
1694#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001695 if (errno == E2BIG) {
1696 return -1;
1697 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001698#endif
1699#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001700 if (errno == EINVAL) {
1701 return -3;
1702 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001703#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001704 {
1705 return -3;
1706 }
1707 }
1708 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001709}
1710#endif /* LIBXML_ICONV_ENABLED */
1711
Daniel Veillard97ac1312001-05-30 19:14:17 +00001712/************************************************************************
1713 * *
1714 * The real API used by libxml for on-the-fly conversion *
1715 * *
1716 ************************************************************************/
1717
Owen Taylor3473f882001-02-23 17:55:21 +00001718/**
1719 * xmlCharEncFirstLine:
1720 * @handler: char enconding transformation data structure
1721 * @out: an xmlBuffer for the output.
1722 * @in: an xmlBuffer for the input
1723 *
1724 * Front-end for the encoding handler input function, but handle only
1725 * the very first line, i.e. limit itself to 45 chars.
1726 *
1727 * Returns the number of byte written if success, or
1728 * -1 general error
1729 * -2 if the transcoding fails (for *in is not valid utf8 string or
1730 * the result of transformation can't fit into the encoding we want), or
1731 */
1732int
1733xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1734 xmlBufferPtr in) {
1735 int ret = -2;
1736 int written;
1737 int toconv;
1738
1739 if (handler == NULL) return(-1);
1740 if (out == NULL) return(-1);
1741 if (in == NULL) return(-1);
1742
1743 written = out->size - out->use;
1744 toconv = in->use;
1745 if (toconv * 2 >= written) {
1746 xmlBufferGrow(out, toconv);
1747 written = out->size - out->use - 1;
1748 }
1749
1750 /*
1751 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1752 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001753 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001754 */
1755 written = 45;
1756
1757 if (handler->input != NULL) {
1758 ret = handler->input(&out->content[out->use], &written,
1759 in->content, &toconv);
1760 xmlBufferShrink(in, toconv);
1761 out->use += written;
1762 out->content[out->use] = 0;
1763 }
1764#ifdef LIBXML_ICONV_ENABLED
1765 else if (handler->iconv_in != NULL) {
1766 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1767 &written, in->content, &toconv);
1768 xmlBufferShrink(in, toconv);
1769 out->use += written;
1770 out->content[out->use] = 0;
1771 if (ret == -1) ret = -3;
1772 }
1773#endif /* LIBXML_ICONV_ENABLED */
1774#ifdef DEBUG_ENCODING
1775 switch (ret) {
1776 case 0:
1777 xmlGenericError(xmlGenericErrorContext,
1778 "converted %d bytes to %d bytes of input\n",
1779 toconv, written);
1780 break;
1781 case -1:
1782 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1783 toconv, written, in->use);
1784 break;
1785 case -2:
1786 xmlGenericError(xmlGenericErrorContext,
1787 "input conversion failed due to input error\n");
1788 break;
1789 case -3:
1790 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1791 toconv, written, in->use);
1792 break;
1793 default:
1794 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1795 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001796#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001797 /*
1798 * Ignore when input buffer is not on a boundary
1799 */
1800 if (ret == -3) ret = 0;
1801 if (ret == -1) ret = 0;
1802 return(ret);
1803}
1804
1805/**
1806 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001807 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001808 * @out: an xmlBuffer for the output.
1809 * @in: an xmlBuffer for the input
1810 *
1811 * Generic front-end for the encoding handler input function
1812 *
1813 * Returns the number of byte written if success, or
1814 * -1 general error
1815 * -2 if the transcoding fails (for *in is not valid utf8 string or
1816 * the result of transformation can't fit into the encoding we want), or
1817 */
1818int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001819xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1820 xmlBufferPtr in)
1821{
Owen Taylor3473f882001-02-23 17:55:21 +00001822 int ret = -2;
1823 int written;
1824 int toconv;
1825
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001826 if (handler == NULL)
1827 return (-1);
1828 if (out == NULL)
1829 return (-1);
1830 if (in == NULL)
1831 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001832
1833 toconv = in->use;
1834 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001835 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001836 written = out->size - out->use;
1837 if (toconv * 2 >= written) {
1838 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001839 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001842 ret = handler->input(&out->content[out->use], &written,
1843 in->content, &toconv);
1844 xmlBufferShrink(in, toconv);
1845 out->use += written;
1846 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001847 }
1848#ifdef LIBXML_ICONV_ENABLED
1849 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001850 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1851 &written, in->content, &toconv);
1852 xmlBufferShrink(in, toconv);
1853 out->use += written;
1854 out->content[out->use] = 0;
1855 if (ret == -1)
1856 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858#endif /* LIBXML_ICONV_ENABLED */
1859 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001860 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001861#ifdef DEBUG_ENCODING
1862 xmlGenericError(xmlGenericErrorContext,
1863 "converted %d bytes to %d bytes of input\n",
1864 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001865#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001866 break;
1867 case -1:
1868#ifdef DEBUG_ENCODING
1869 xmlGenericError(xmlGenericErrorContext,
1870 "converted %d bytes to %d bytes of input, %d left\n",
1871 toconv, written, in->use);
1872#endif
1873 break;
1874 case -3:
1875#ifdef DEBUG_ENCODING
1876 xmlGenericError(xmlGenericErrorContext,
1877 "converted %d bytes to %d bytes of input, %d left\n",
1878 toconv, written, in->use);
1879#endif
1880 break;
Owen Taylor3473f882001-02-23 17:55:21 +00001881 case -2:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001882 xmlGenericError(xmlGenericErrorContext,
1883 "input conversion failed due to input error\n");
1884 xmlGenericError(xmlGenericErrorContext,
1885 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1886 in->content[0], in->content[1],
1887 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00001888 }
1889 /*
1890 * Ignore when input buffer is not on a boundary
1891 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001892 if (ret == -3)
1893 ret = 0;
Daniel Veillardd076a202002-11-20 13:28:31 +00001894 return (written);
Owen Taylor3473f882001-02-23 17:55:21 +00001895}
1896
1897/**
1898 * xmlCharEncOutFunc:
1899 * @handler: char enconding transformation data structure
1900 * @out: an xmlBuffer for the output.
1901 * @in: an xmlBuffer for the input
1902 *
1903 * Generic front-end for the encoding handler output function
1904 * a first call with @in == NULL has to be made firs to initiate the
1905 * output in case of non-stateless encoding needing to initiate their
1906 * state or the output (like the BOM in UTF16).
1907 * In case of UTF8 sequence conversion errors for the given encoder,
1908 * the content will be automatically remapped to a CharRef sequence.
1909 *
1910 * Returns the number of byte written if success, or
1911 * -1 general error
1912 * -2 if the transcoding fails (for *in is not valid utf8 string or
1913 * the result of transformation can't fit into the encoding we want), or
1914 */
1915int
1916xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1917 xmlBufferPtr in) {
1918 int ret = -2;
1919 int written;
1920 int writtentot = 0;
1921 int toconv;
1922 int output = 0;
1923
1924 if (handler == NULL) return(-1);
1925 if (out == NULL) return(-1);
1926
1927retry:
1928
1929 written = out->size - out->use;
1930
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001931 if (written > 0)
1932 written--; /* Gennady: count '/0' */
1933
Owen Taylor3473f882001-02-23 17:55:21 +00001934 /*
1935 * First specific handling of in = NULL, i.e. the initialization call
1936 */
1937 if (in == NULL) {
1938 toconv = 0;
1939 if (handler->output != NULL) {
1940 ret = handler->output(&out->content[out->use], &written,
1941 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001942 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001943 out->use += written;
1944 out->content[out->use] = 0;
1945 }
Owen Taylor3473f882001-02-23 17:55:21 +00001946 }
1947#ifdef LIBXML_ICONV_ENABLED
1948 else if (handler->iconv_out != NULL) {
1949 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1950 &written, NULL, &toconv);
1951 out->use += written;
1952 out->content[out->use] = 0;
1953 }
1954#endif /* LIBXML_ICONV_ENABLED */
1955#ifdef DEBUG_ENCODING
1956 xmlGenericError(xmlGenericErrorContext,
1957 "initialized encoder\n");
1958#endif
1959 return(0);
1960 }
1961
1962 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001963 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001964 */
1965 toconv = in->use;
1966 if (toconv == 0)
1967 return(0);
1968 if (toconv * 2 >= written) {
1969 xmlBufferGrow(out, toconv * 2);
1970 written = out->size - out->use - 1;
1971 }
1972 if (handler->output != NULL) {
1973 ret = handler->output(&out->content[out->use], &written,
1974 in->content, &toconv);
1975 xmlBufferShrink(in, toconv);
1976 out->use += written;
1977 writtentot += written;
1978 out->content[out->use] = 0;
1979 }
1980#ifdef LIBXML_ICONV_ENABLED
1981 else if (handler->iconv_out != NULL) {
1982 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1983 &written, in->content, &toconv);
1984 xmlBufferShrink(in, toconv);
1985 out->use += written;
1986 writtentot += written;
1987 out->content[out->use] = 0;
1988 if (ret == -1) {
1989 if (written > 0) {
1990 /*
1991 * Can be a limitation of iconv
1992 */
1993 goto retry;
1994 }
1995 ret = -3;
1996 }
1997 }
1998#endif /* LIBXML_ICONV_ENABLED */
1999 else {
2000 xmlGenericError(xmlGenericErrorContext,
2001 "xmlCharEncOutFunc: no output function !\n");
2002 return(-1);
2003 }
2004
2005 if (ret >= 0) output += ret;
2006
2007 /*
2008 * Attempt to handle error cases
2009 */
2010 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002011 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002012#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002013 xmlGenericError(xmlGenericErrorContext,
2014 "converted %d bytes to %d bytes of output\n",
2015 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002016#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002017 break;
2018 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002019#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002020 xmlGenericError(xmlGenericErrorContext,
2021 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002022#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002023 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002024 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002025#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002026 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2027 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002028#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002029 break;
2030 case -2: {
2031 int len = in->use;
2032 const xmlChar *utf = (const xmlChar *) in->content;
2033 int cur;
2034
2035 cur = xmlGetUTF8Char(utf, &len);
2036 if (cur > 0) {
2037 xmlChar charref[20];
2038
2039#ifdef DEBUG_ENCODING
2040 xmlGenericError(xmlGenericErrorContext,
2041 "handling output conversion error\n");
2042 xmlGenericError(xmlGenericErrorContext,
2043 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2044 in->content[0], in->content[1],
2045 in->content[2], in->content[3]);
2046#endif
2047 /*
2048 * Removes the UTF8 sequence, and replace it by a charref
2049 * and continue the transcoding phase, hoping the error
2050 * did not mangle the encoder state.
2051 */
Aleksey Sanin49cc9752002-06-14 17:07:10 +00002052 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002053 xmlBufferShrink(in, len);
2054 xmlBufferAddHead(in, charref, -1);
2055
2056 goto retry;
2057 } else {
2058 xmlGenericError(xmlGenericErrorContext,
2059 "output conversion failed due to conv error\n");
2060 xmlGenericError(xmlGenericErrorContext,
2061 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2062 in->content[0], in->content[1],
2063 in->content[2], in->content[3]);
2064 in->content[0] = ' ';
2065 }
2066 break;
2067 }
2068 }
2069 return(ret);
2070}
2071
2072/**
2073 * xmlCharEncCloseFunc:
2074 * @handler: char enconding transformation data structure
2075 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002076 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002077 *
2078 * Returns 0 if success, or -1 in case of error
2079 */
2080int
2081xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2082 int ret = 0;
2083 if (handler == NULL) return(-1);
2084 if (handler->name == NULL) return(-1);
2085#ifdef LIBXML_ICONV_ENABLED
2086 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002087 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002088 * and the associated icon resources.
2089 */
2090 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2091 if (handler->name != NULL)
2092 xmlFree(handler->name);
2093 handler->name = NULL;
2094 if (handler->iconv_out != NULL) {
2095 if (iconv_close(handler->iconv_out))
2096 ret = -1;
2097 handler->iconv_out = NULL;
2098 }
2099 if (handler->iconv_in != NULL) {
2100 if (iconv_close(handler->iconv_in))
2101 ret = -1;
2102 handler->iconv_in = NULL;
2103 }
2104 xmlFree(handler);
2105 }
2106#endif /* LIBXML_ICONV_ENABLED */
2107#ifdef DEBUG_ENCODING
2108 if (ret)
2109 xmlGenericError(xmlGenericErrorContext,
2110 "failed to close the encoding handler\n");
2111 else
2112 xmlGenericError(xmlGenericErrorContext,
2113 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002114#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002115
Owen Taylor3473f882001-02-23 17:55:21 +00002116 return(ret);
2117}
2118
Daniel Veillard36711902004-02-11 13:25:26 +00002119/**
2120 * xmlByteConsumed:
2121 * @ctxt: an XML parser context
2122 *
2123 * This function provides the current index of the parser relative
2124 * to the start of the current entity. This function is computed in
2125 * bytes from the beginning starting at zero and finishing at the
2126 * size in byte of the file if parsing a file. The function is
2127 * of constant cost if the input is UTF-8 but can be costly if run
2128 * on non-UTF-8 input.
2129 *
2130 * Returns the index in bytes from the beginning of the entity or -1
2131 * in case the index could not be computed.
2132 */
2133long
2134xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2135 xmlParserInputPtr in;
2136
2137 if (ctxt == NULL) return(-1);
2138 in = ctxt->input;
2139 if (in == NULL) return(-1);
2140 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2141 unsigned int unused = 0;
2142 xmlCharEncodingHandler * handler = in->buf->encoder;
2143 /*
2144 * Encoding conversion, compute the number of unused original
2145 * bytes from the input not consumed and substract that from
2146 * the raw consumed value, this is not a cheap operation
2147 */
2148 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002149 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002150 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002151 int toconv = in->end - in->cur, written = 32000;
2152
2153 int ret;
2154
2155 if (handler->output != NULL) {
2156 do {
2157 toconv = in->end - cur;
2158 written = 32000;
2159 ret = handler->output(&convbuf[0], &written,
2160 cur, &toconv);
2161 if (ret == -1) return(-1);
2162 unused += written;
2163 cur += toconv;
2164 } while (ret == -2);
2165#ifdef LIBXML_ICONV_ENABLED
2166 } else if (handler->iconv_out != NULL) {
2167 do {
2168 toconv = in->end - cur;
2169 written = 32000;
2170 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2171 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002172 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002173 if (written > 0)
2174 ret = -2;
2175 else
2176 return(-1);
2177 }
2178 unused += written;
2179 cur += toconv;
2180 } while (ret == -2);
2181#endif
2182 } else {
2183 /* could not find a converter */
2184 return(-1);
2185 }
2186 }
2187 if (in->buf->rawconsumed < unused)
2188 return(-1);
2189 return(in->buf->rawconsumed - unused);
2190 }
2191 return(in->consumed + (in->cur - in->base));
2192}
2193
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002194#ifndef LIBXML_ICONV_ENABLED
2195#ifdef LIBXML_ISO8859X_ENABLED
2196
2197/**
2198 * UTF8ToISO8859x:
2199 * @out: a pointer to an array of bytes to store the result
2200 * @outlen: the length of @out
2201 * @in: a pointer to an array of UTF-8 chars
2202 * @inlen: the length of @in
2203 * @xlattable: the 2-level transcoding table
2204 *
2205 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2206 * block of chars out.
2207 *
2208 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2209 * The value of @inlen after return is the number of octets consumed
2210 * as the return value is positive, else unpredictable.
2211 * The value of @outlen after return is the number of ocetes consumed.
2212 */
2213static int
2214UTF8ToISO8859x(unsigned char* out, int *outlen,
2215 const unsigned char* in, int *inlen,
2216 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002217 const unsigned char* outstart = out;
2218 const unsigned char* inend;
2219 const unsigned char* instart = in;
2220
Daniel Veillardce682bc2004-11-05 17:22:25 +00002221 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2222 (xlattable == NULL))
2223 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002224 if (in == NULL) {
2225 /*
2226 * initialization nothing to do
2227 */
2228 *outlen = 0;
2229 *inlen = 0;
2230 return(0);
2231 }
2232 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002233 while (in < inend) {
2234 unsigned char d = *in++;
2235 if (d < 0x80) {
2236 *out++ = d;
2237 } else if (d < 0xC0) {
2238 /* trailing byte in leading position */
2239 *outlen = out - outstart;
2240 *inlen = in - instart - 1;
2241 return(-2);
2242 } else if (d < 0xE0) {
2243 unsigned char c;
2244 if (!(in < inend)) {
2245 /* trailing byte not in input buffer */
2246 *outlen = out - outstart;
2247 *inlen = in - instart - 1;
2248 return(-2);
2249 }
2250 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002251 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002252 /* not a trailing byte */
2253 *outlen = out - outstart;
2254 *inlen = in - instart - 2;
2255 return(-2);
2256 }
2257 c = c & 0x3F;
2258 d = d & 0x1F;
2259 d = xlattable [48 + c + xlattable [d] * 64];
2260 if (d == 0) {
2261 /* not in character set */
2262 *outlen = out - outstart;
2263 *inlen = in - instart - 2;
2264 return(-2);
2265 }
2266 *out++ = d;
2267 } else if (d < 0xF0) {
2268 unsigned char c1;
2269 unsigned char c2;
2270 if (!(in < inend - 1)) {
2271 /* trailing bytes not in input buffer */
2272 *outlen = out - outstart;
2273 *inlen = in - instart - 1;
2274 return(-2);
2275 }
2276 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002277 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002278 /* not a trailing byte (c1) */
2279 *outlen = out - outstart;
2280 *inlen = in - instart - 2;
2281 return(-2);
2282 }
2283 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002284 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002285 /* not a trailing byte (c2) */
2286 *outlen = out - outstart;
2287 *inlen = in - instart - 2;
2288 return(-2);
2289 }
2290 c1 = c1 & 0x3F;
2291 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002292 d = d & 0x0F;
2293 d = xlattable [48 + c2 + xlattable [48 + c1 +
2294 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002295 if (d == 0) {
2296 /* not in character set */
2297 *outlen = out - outstart;
2298 *inlen = in - instart - 3;
2299 return(-2);
2300 }
2301 *out++ = d;
2302 } else {
2303 /* cannot transcode >= U+010000 */
2304 *outlen = out - outstart;
2305 *inlen = in - instart - 1;
2306 return(-2);
2307 }
2308 }
2309 *outlen = out - outstart;
2310 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002311 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002312}
2313
2314/**
2315 * ISO8859xToUTF8
2316 * @out: a pointer to an array of bytes to store the result
2317 * @outlen: the length of @out
2318 * @in: a pointer to an array of ISO Latin 1 chars
2319 * @inlen: the length of @in
2320 *
2321 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2322 * block of chars out.
2323 * Returns 0 if success, or -1 otherwise
2324 * The value of @inlen after return is the number of octets consumed
2325 * The value of @outlen after return is the number of ocetes produced.
2326 */
2327static int
2328ISO8859xToUTF8(unsigned char* out, int *outlen,
2329 const unsigned char* in, int *inlen,
2330 unsigned short const *unicodetable) {
2331 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002332 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002333 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002334 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00002335 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002336 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002337
Daniel Veillardce682bc2004-11-05 17:22:25 +00002338 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002339 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002340 return(-1);
2341 outend = out + *outlen;
2342 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00002343 instop = inend;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002344 c = *in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002345 while (in < inend && out < outend - 1) {
2346 if (c >= 0x80) {
2347 c = unicodetable [c - 0x80];
2348 if (c == 0) {
2349 /* undefined code point */
2350 *outlen = out - outstart;
2351 *inlen = in - instart;
2352 return (-1);
2353 }
2354 if (c < 0x800) {
2355 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2356 *out++ = (c & 0x3F) | 0x80;
2357 } else {
2358 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2359 *out++ = ((c >> 6) & 0x3F) | 0x80;
2360 *out++ = (c & 0x3F) | 0x80;
2361 }
2362 ++in;
2363 c = *in;
2364 }
2365 if (instop - in > outend - out) instop = in + (outend - out);
2366 while (c < 0x80 && in < instop) {
2367 *out++ = c;
2368 ++in;
2369 c = *in;
2370 }
2371 }
2372 if (in < inend && out < outend && c < 0x80) {
2373 *out++ = c;
2374 ++in;
2375 }
2376 *outlen = out - outstart;
2377 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002378 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002379}
2380
2381
2382/************************************************************************
2383 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2384 ************************************************************************/
2385
2386static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2387 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2388 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2389 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2390 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2391 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2392 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2393 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2394 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2395 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2396 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2397 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2398 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2399 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2400 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2401 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2402 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2403};
2404
2405static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2406 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2413 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2414 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2415 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2416 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2417 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2418 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2421 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2422 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2425 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2426 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2427 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2428 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2429 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2430 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2431 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2432 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2433};
2434
2435static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2436 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2437 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2439 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2440 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2441 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2442 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2443 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2444 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2445 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2446 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2447 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2448 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2449 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2450 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2451 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2452};
2453
2454static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2455 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2462 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2463 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2464 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2465 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2466 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2467 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2468 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2469 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2471 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2474 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2482 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2483 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2484 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2485 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2486};
2487
2488static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2489 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2490 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2491 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2492 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2493 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2494 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2495 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2496 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2497 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2498 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2499 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2500 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2501 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2502 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2503 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2504 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2505};
2506
2507static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2508 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2515 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2516 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2517 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2518 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2519 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2520 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2521 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2522 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2523 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2524 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2525 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2526 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2527 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2531 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2532 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2533 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2534 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2535};
2536
2537static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2538 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2539 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2541 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2542 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2543 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2544 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2545 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2546 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2547 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2548 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2549 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2550 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2551 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2552 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2553 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2554};
2555
2556static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2557 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2558 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2564 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2565 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2566 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2568 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2569 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2570 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2571 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2572 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2573 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2576 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2581 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2584};
2585
2586static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2587 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2588 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2589 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2590 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2591 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2592 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2593 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2594 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2595 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2596 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2597 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2598 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2599 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2600 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2601 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2602 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2603};
2604
2605static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2606 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2614 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2615 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2617 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2623 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2624 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2625 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2626 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629};
2630
2631static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2632 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2633 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2634 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2635 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2636 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2637 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2638 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2639 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2640 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2641 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2642 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2643 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2644 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2645 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2646 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2647 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2648};
2649
2650static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2651 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2658 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2659 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2660 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2661 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2662 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2667 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2670 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2674 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2675 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2676 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2677 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2678 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682};
2683
2684static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2685 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2686 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2687 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2688 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2689 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2690 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2691 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2692 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2693 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2694 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2695 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2696 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2697 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2698 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2699 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2700 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2701};
2702
2703static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2704 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2712 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2713 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2714 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2715 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2718 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2728 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2733 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2735};
2736
2737static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2738 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2739 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2740 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2741 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2742 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2743 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2744 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2745 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2746 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2747 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2748 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2749 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2750 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2751 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2752 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2753 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2754};
2755
2756static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2757 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2765 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2766 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2767 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2768 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2769 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2770 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2771 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2775 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780};
2781
2782static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2783 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2784 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2785 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2786 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2787 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2788 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2789 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2790 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2791 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2792 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2793 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2794 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2795 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2796 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2797 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2798 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2799};
2800
2801static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2802 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2809 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2810 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2811 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2812 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2814 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2815 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2816 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2817 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2819 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2820 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2821 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2822 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2830 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2831 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2832 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2833};
2834
2835static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2836 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2837 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2838 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2839 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2840 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2841 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2842 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2843 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2844 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2845 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2846 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2847 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2848 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2849 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2850 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2851 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2852};
2853
2854static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2855 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2863 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2864 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2870 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2871 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2872 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2873 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2874 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2879 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882};
2883
2884static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2885 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2886 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2887 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2888 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2889 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2890 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2891 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2892 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2893 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2894 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2895 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2896 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2897 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2898 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2899 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2900 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2901};
2902
2903static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2904 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2911 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2912 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2913 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2914 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2915 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2925 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2926 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2927 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2928 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2929 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2930 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2931 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2932 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2934 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2935};
2936
2937static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2938 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2939 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2940 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2941 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2942 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2943 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2944 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2945 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2946 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2947 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2948 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2949 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2950 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2951 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2952 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2953 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2954};
2955
2956static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2957 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2964 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2965 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2966 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2967 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
2972 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
2992 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2997 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
2998 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2999 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3000};
3001
3002static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3003 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3004 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3005 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3006 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3007 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3008 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3009 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3010 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3011 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3012 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3013 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3014 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3015 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3016 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3017 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3018 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3019};
3020
3021static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3022 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3030 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3031 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3032 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3033 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3040 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3041 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3045 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3046 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3047 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3048 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3049};
3050
3051static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3052 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3053 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3054 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3055 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3056 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3057 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3058 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3059 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3060 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3061 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3062 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3063 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3064 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3065 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3066 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3067 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3068};
3069
3070static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3071 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3079 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3080 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3081 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3082 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3083 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3084 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3088 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3090 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3107 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3108 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3109 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3110};
3111
3112
3113/*
3114 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3115 */
3116
3117static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3118 const unsigned char* in, int *inlen) {
3119 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3120}
3121static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3122 const unsigned char* in, int *inlen) {
3123 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3124}
3125
3126static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3127 const unsigned char* in, int *inlen) {
3128 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3129}
3130static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3131 const unsigned char* in, int *inlen) {
3132 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3133}
3134
3135static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3136 const unsigned char* in, int *inlen) {
3137 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3138}
3139static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3140 const unsigned char* in, int *inlen) {
3141 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3142}
3143
3144static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3145 const unsigned char* in, int *inlen) {
3146 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3147}
3148static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3149 const unsigned char* in, int *inlen) {
3150 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3151}
3152
3153static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3154 const unsigned char* in, int *inlen) {
3155 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3156}
3157static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3158 const unsigned char* in, int *inlen) {
3159 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3160}
3161
3162static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3163 const unsigned char* in, int *inlen) {
3164 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3165}
3166static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3167 const unsigned char* in, int *inlen) {
3168 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3169}
3170
3171static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3172 const unsigned char* in, int *inlen) {
3173 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3174}
3175static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3176 const unsigned char* in, int *inlen) {
3177 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3178}
3179
3180static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3181 const unsigned char* in, int *inlen) {
3182 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3183}
3184static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3185 const unsigned char* in, int *inlen) {
3186 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3187}
3188
3189static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3190 const unsigned char* in, int *inlen) {
3191 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3192}
3193static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3194 const unsigned char* in, int *inlen) {
3195 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3196}
3197
3198static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3199 const unsigned char* in, int *inlen) {
3200 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3201}
3202static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3203 const unsigned char* in, int *inlen) {
3204 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3205}
3206
3207static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3208 const unsigned char* in, int *inlen) {
3209 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3210}
3211static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3212 const unsigned char* in, int *inlen) {
3213 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3214}
3215
3216static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3217 const unsigned char* in, int *inlen) {
3218 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3219}
3220static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3221 const unsigned char* in, int *inlen) {
3222 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3223}
3224
3225static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3226 const unsigned char* in, int *inlen) {
3227 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3228}
3229static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3230 const unsigned char* in, int *inlen) {
3231 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3232}
3233
3234static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3235 const unsigned char* in, int *inlen) {
3236 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3237}
3238static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3239 const unsigned char* in, int *inlen) {
3240 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3241}
3242
3243static void
3244xmlRegisterCharEncodingHandlersISO8859x (void) {
3245 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3246 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3247 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3248 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3249 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3250 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3251 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3252 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3253 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3254 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3255 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3256 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3257 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3258 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3259}
3260
3261#endif
3262#endif
3263
Daniel Veillard5d4644e2005-04-01 13:11:58 +00003264#define bottom_encoding
3265#include "elfgcchack.h"
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003266