blob: c6cfe9194e822a2b02312ee9c3ea6f737ff8f105 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Owen Taylor3473f882001-02-23 17:55:21 +000034#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000044#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000045#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046
Daniel Veillard22090732001-07-16 00:06:07 +000047static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000049
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING /* Define this to get encoding traces */
64#endif
William M. Brack16db7b62003-08-07 13:12:49 +000065#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
Owen Taylor3473f882001-02-23 17:55:21 +000069#endif
70
71static int xmlLittleEndian = 1;
72
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000073/**
74 * xmlEncodingErrMemory:
75 * @extra: extra informations
76 *
77 * Handle an out of memory condition
78 */
79static void
80xmlEncodingErrMemory(const char *extra)
81{
82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83}
84
85/**
86 * xmlErrEncoding:
87 * @error: the error number
88 * @msg: the error message
89 *
90 * n encoding error
91 */
92static void
93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94{
95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96 XML_FROM_I18N, error, XML_ERR_FATAL,
97 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98}
Daniel Veillard97ac1312001-05-30 19:14:17 +000099
100/************************************************************************
101 * *
102 * Conversions To/From UTF8 encoding *
103 * *
104 ************************************************************************/
105
106/**
Owen Taylor3473f882001-02-23 17:55:21 +0000107 * asciiToUTF8:
108 * @out: a pointer to an array of bytes to store the result
109 * @outlen: the length of @out
110 * @in: a pointer to an array of ASCII chars
111 * @inlen: the length of @in
112 *
113 * Take a block of ASCII chars in and try to convert it to an UTF-8
114 * block of chars out.
115 * Returns 0 if success, or -1 otherwise
116 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000117 * if the return value is positive, else unpredictable.
118 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000120static int
Owen Taylor3473f882001-02-23 17:55:21 +0000121asciiToUTF8(unsigned char* out, int *outlen,
122 const unsigned char* in, int *inlen) {
123 unsigned char* outstart = out;
124 const unsigned char* base = in;
125 const unsigned char* processed = in;
126 unsigned char* outend = out + *outlen;
127 const unsigned char* inend;
128 unsigned int c;
129 int bits;
130
131 inend = in + (*inlen);
132 while ((in < inend) && (out - outstart + 5 < *outlen)) {
133 c= *in++;
134
135 /* assertion: c is a single UTF-4 value */
136 if (out >= outend)
137 break;
138 if (c < 0x80) { *out++= c; bits= -6; }
139 else {
140 *outlen = out - outstart;
141 *inlen = processed - base;
142 return(-1);
143 }
144
145 for ( ; bits >= 0; bits-= 6) {
146 if (out >= outend)
147 break;
148 *out++= ((c >> bits) & 0x3F) | 0x80;
149 }
150 processed = (const unsigned char*) in;
151 }
152 *outlen = out - outstart;
153 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000154 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000155}
156
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000157#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000158/**
159 * UTF8Toascii:
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of UTF-8 chars
163 * @inlen: the length of @in
164 *
165 * Take a block of UTF-8 chars in and try to convert it to an ASCII
166 * block of chars out.
167 *
168 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
169 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000170 * if the return value is positive, else unpredictable.
171 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000172 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000173static int
Owen Taylor3473f882001-02-23 17:55:21 +0000174UTF8Toascii(unsigned char* out, int *outlen,
175 const unsigned char* in, int *inlen) {
176 const unsigned char* processed = in;
177 const unsigned char* outend;
178 const unsigned char* outstart = out;
179 const unsigned char* instart = in;
180 const unsigned char* inend;
181 unsigned int c, d;
182 int trailing;
183
Daniel Veillardce682bc2004-11-05 17:22:25 +0000184 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000185 if (in == NULL) {
186 /*
187 * initialization nothing to do
188 */
189 *outlen = 0;
190 *inlen = 0;
191 return(0);
192 }
193 inend = in + (*inlen);
194 outend = out + (*outlen);
195 while (in < inend) {
196 d = *in++;
197 if (d < 0x80) { c= d; trailing= 0; }
198 else if (d < 0xC0) {
199 /* trailing byte in leading position */
200 *outlen = out - outstart;
201 *inlen = processed - instart;
202 return(-2);
203 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
204 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
205 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
206 else {
207 /* no chance for this in Ascii */
208 *outlen = out - outstart;
209 *inlen = processed - instart;
210 return(-2);
211 }
212
213 if (inend - in < trailing) {
214 break;
215 }
216
217 for ( ; trailing; trailing--) {
218 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
219 break;
220 c <<= 6;
221 c |= d & 0x3F;
222 }
223
224 /* assertion: c is a single UTF-4 value */
225 if (c < 0x80) {
226 if (out >= outend)
227 break;
228 *out++ = c;
229 } else {
230 /* no chance for this in Ascii */
231 *outlen = out - outstart;
232 *inlen = processed - instart;
233 return(-2);
234 }
235 processed = in;
236 }
237 *outlen = out - outstart;
238 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000239 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000240}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000241#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000242
243/**
244 * isolat1ToUTF8:
245 * @out: a pointer to an array of bytes to store the result
246 * @outlen: the length of @out
247 * @in: a pointer to an array of ISO Latin 1 chars
248 * @inlen: the length of @in
249 *
250 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
251 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000252 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000253 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000254 * if the return value is positive, else unpredictable.
255 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000256 */
257int
258isolat1ToUTF8(unsigned char* out, int *outlen,
259 const unsigned char* in, int *inlen) {
260 unsigned char* outstart = out;
261 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000262 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000263 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000264 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000265
Daniel Veillardce682bc2004-11-05 17:22:25 +0000266 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
267 return(-1);
268
269 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000270 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000271 instop = inend;
272
273 while (in < inend && out < outend - 1) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000274 if (*in >= 0x80) {
275 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
276 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000277 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000278 }
279 if (instop - in > outend - out) instop = in + (outend - out);
Daniel Veillard182d32a2004-02-09 12:42:55 +0000280 while (in < instop && *in < 0x80) {
281 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000282 }
283 }
Daniel Veillard182d32a2004-02-09 12:42:55 +0000284 if (in < inend && out < outend && *in < 0x80) {
285 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000286 }
287 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000288 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000289 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000290}
291
Daniel Veillard81601f92003-01-14 13:42:37 +0000292/**
293 * UTF8ToUTF8:
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @inb: a pointer to an array of UTF-8 chars
297 * @inlenb: the length of @in in UTF-8 chars
298 *
299 * No op copy operation for UTF8 handling.
300 *
William M. Brackf9415e42003-11-28 09:39:10 +0000301 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000302 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000303 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000304 */
305static int
306UTF8ToUTF8(unsigned char* out, int *outlen,
307 const unsigned char* inb, int *inlenb)
308{
309 int len;
310
311 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
312 return(-1);
313 if (*outlen > *inlenb) {
314 len = *inlenb;
315 } else {
316 len = *outlen;
317 }
318 if (len < 0)
319 return(-1);
320
321 memcpy(out, inb, len);
322
323 *outlen = len;
324 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000325 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000326}
327
Daniel Veillarde72c7562002-05-31 09:47:30 +0000328
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000329#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000330/**
331 * UTF8Toisolat1:
332 * @out: a pointer to an array of bytes to store the result
333 * @outlen: the length of @out
334 * @in: a pointer to an array of UTF-8 chars
335 * @inlen: the length of @in
336 *
337 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
338 * block of chars out.
339 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000340 * Returns the number of bytes written if success, -2 if the transcoding fails,
341 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000342 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000343 * if the return value is positive, else unpredictable.
344 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 */
346int
347UTF8Toisolat1(unsigned char* out, int *outlen,
348 const unsigned char* in, int *inlen) {
349 const unsigned char* processed = in;
350 const unsigned char* outend;
351 const unsigned char* outstart = out;
352 const unsigned char* instart = in;
353 const unsigned char* inend;
354 unsigned int c, d;
355 int trailing;
356
Daniel Veillardce682bc2004-11-05 17:22:25 +0000357 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000358 if (in == NULL) {
359 /*
360 * initialization nothing to do
361 */
362 *outlen = 0;
363 *inlen = 0;
364 return(0);
365 }
366 inend = in + (*inlen);
367 outend = out + (*outlen);
368 while (in < inend) {
369 d = *in++;
370 if (d < 0x80) { c= d; trailing= 0; }
371 else if (d < 0xC0) {
372 /* trailing byte in leading position */
373 *outlen = out - outstart;
374 *inlen = processed - instart;
375 return(-2);
376 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
377 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
378 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
379 else {
380 /* no chance for this in IsoLat1 */
381 *outlen = out - outstart;
382 *inlen = processed - instart;
383 return(-2);
384 }
385
386 if (inend - in < trailing) {
387 break;
388 }
389
390 for ( ; trailing; trailing--) {
391 if (in >= inend)
392 break;
393 if (((d= *in++) & 0xC0) != 0x80) {
394 *outlen = out - outstart;
395 *inlen = processed - instart;
396 return(-2);
397 }
398 c <<= 6;
399 c |= d & 0x3F;
400 }
401
402 /* assertion: c is a single UTF-4 value */
403 if (c <= 0xFF) {
404 if (out >= outend)
405 break;
406 *out++ = c;
407 } else {
408 /* no chance for this in IsoLat1 */
409 *outlen = out - outstart;
410 *inlen = processed - instart;
411 return(-2);
412 }
413 processed = in;
414 }
415 *outlen = out - outstart;
416 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000417 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000418}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000419#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000420
421/**
422 * UTF16LEToUTF8:
423 * @out: a pointer to an array of bytes to store the result
424 * @outlen: the length of @out
425 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
426 * @inlenb: the length of @in in UTF-16LE chars
427 *
428 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000429 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000430 * is the same between the native type of this machine and the
431 * inputed one.
432 *
William M. Brackf9415e42003-11-28 09:39:10 +0000433 * Returns the number of bytes written, or -1 if lack of space, or -2
434 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000435 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000436 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000437 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000438static int
Owen Taylor3473f882001-02-23 17:55:21 +0000439UTF16LEToUTF8(unsigned char* out, int *outlen,
440 const unsigned char* inb, int *inlenb)
441{
442 unsigned char* outstart = out;
443 const unsigned char* processed = inb;
444 unsigned char* outend = out + *outlen;
445 unsigned short* in = (unsigned short*) inb;
446 unsigned short* inend;
447 unsigned int c, d, inlen;
448 unsigned char *tmp;
449 int bits;
450
451 if ((*inlenb % 2) == 1)
452 (*inlenb)--;
453 inlen = *inlenb / 2;
454 inend = in + inlen;
455 while ((in < inend) && (out - outstart + 5 < *outlen)) {
456 if (xmlLittleEndian) {
457 c= *in++;
458 } else {
459 tmp = (unsigned char *) in;
460 c = *tmp++;
461 c = c | (((unsigned int)*tmp) << 8);
462 in++;
463 }
464 if ((c & 0xFC00) == 0xD800) { /* surrogates */
465 if (in >= inend) { /* (in > inend) shouldn't happens */
466 break;
467 }
468 if (xmlLittleEndian) {
469 d = *in++;
470 } else {
471 tmp = (unsigned char *) in;
472 d = *tmp++;
473 d = d | (((unsigned int)*tmp) << 8);
474 in++;
475 }
476 if ((d & 0xFC00) == 0xDC00) {
477 c &= 0x03FF;
478 c <<= 10;
479 c |= d & 0x03FF;
480 c += 0x10000;
481 }
482 else {
483 *outlen = out - outstart;
484 *inlenb = processed - inb;
485 return(-2);
486 }
487 }
488
489 /* assertion: c is a single UTF-4 value */
490 if (out >= outend)
491 break;
492 if (c < 0x80) { *out++= c; bits= -6; }
493 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
494 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
495 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
496
497 for ( ; bits >= 0; bits-= 6) {
498 if (out >= outend)
499 break;
500 *out++= ((c >> bits) & 0x3F) | 0x80;
501 }
502 processed = (const unsigned char*) in;
503 }
504 *outlen = out - outstart;
505 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000506 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000507}
508
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000509#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000510/**
511 * UTF8ToUTF16LE:
512 * @outb: a pointer to an array of bytes to store the result
513 * @outlen: the length of @outb
514 * @in: a pointer to an array of UTF-8 chars
515 * @inlen: the length of @in
516 *
517 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
518 * block of chars out.
519 *
William M. Brackf9415e42003-11-28 09:39:10 +0000520 * Returns the number of bytes written, or -1 if lack of space, or -2
Owen Taylor3473f882001-02-23 17:55:21 +0000521 * if the transcoding failed.
522 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000523static int
Owen Taylor3473f882001-02-23 17:55:21 +0000524UTF8ToUTF16LE(unsigned char* outb, int *outlen,
525 const unsigned char* in, int *inlen)
526{
527 unsigned short* out = (unsigned short*) outb;
528 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000529 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000530 unsigned short* outstart= out;
531 unsigned short* outend;
532 const unsigned char* inend= in+*inlen;
533 unsigned int c, d;
534 int trailing;
535 unsigned char *tmp;
536 unsigned short tmp1, tmp2;
537
William M. Brackf9415e42003-11-28 09:39:10 +0000538 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000539 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000540 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000541 *outlen = 0;
542 *inlen = 0;
543 return(0);
544 }
545 outend = out + (*outlen / 2);
546 while (in < inend) {
547 d= *in++;
548 if (d < 0x80) { c= d; trailing= 0; }
549 else if (d < 0xC0) {
550 /* trailing byte in leading position */
551 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000552 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 return(-2);
554 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
555 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
556 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
557 else {
558 /* no chance for this in UTF-16 */
559 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000560 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000561 return(-2);
562 }
563
564 if (inend - in < trailing) {
565 break;
566 }
567
568 for ( ; trailing; trailing--) {
569 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
570 break;
571 c <<= 6;
572 c |= d & 0x3F;
573 }
574
575 /* assertion: c is a single UTF-4 value */
576 if (c < 0x10000) {
577 if (out >= outend)
578 break;
579 if (xmlLittleEndian) {
580 *out++ = c;
581 } else {
582 tmp = (unsigned char *) out;
583 *tmp = c ;
584 *(tmp + 1) = c >> 8 ;
585 out++;
586 }
587 }
588 else if (c < 0x110000) {
589 if (out+1 >= outend)
590 break;
591 c -= 0x10000;
592 if (xmlLittleEndian) {
593 *out++ = 0xD800 | (c >> 10);
594 *out++ = 0xDC00 | (c & 0x03FF);
595 } else {
596 tmp1 = 0xD800 | (c >> 10);
597 tmp = (unsigned char *) out;
598 *tmp = (unsigned char) tmp1;
599 *(tmp + 1) = tmp1 >> 8;
600 out++;
601
602 tmp2 = 0xDC00 | (c & 0x03FF);
603 tmp = (unsigned char *) out;
604 *tmp = (unsigned char) tmp2;
605 *(tmp + 1) = tmp2 >> 8;
606 out++;
607 }
608 }
609 else
610 break;
611 processed = in;
612 }
613 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000614 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000615 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000616}
617
618/**
William M. Brackf9415e42003-11-28 09:39:10 +0000619 * UTF8ToUTF16:
620 * @outb: a pointer to an array of bytes to store the result
621 * @outlen: the length of @outb
622 * @in: a pointer to an array of UTF-8 chars
623 * @inlen: the length of @in
624 *
625 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
626 * block of chars out.
627 *
628 * Returns the number of bytes written, or -1 if lack of space, or -2
629 * if the transcoding failed.
630 */
631static int
632UTF8ToUTF16(unsigned char* outb, int *outlen,
633 const unsigned char* in, int *inlen)
634{
635 if (in == NULL) {
636 /*
637 * initialization, add the Byte Order Mark for UTF-16LE
638 */
639 if (*outlen >= 2) {
640 outb[0] = 0xFF;
641 outb[1] = 0xFE;
642 *outlen = 2;
643 *inlen = 0;
644#ifdef DEBUG_ENCODING
645 xmlGenericError(xmlGenericErrorContext,
646 "Added FFFE Byte Order Mark\n");
647#endif
648 return(2);
649 }
650 *outlen = 0;
651 *inlen = 0;
652 return(0);
653 }
654 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
655}
William M. Brack030a7a12004-02-10 12:48:57 +0000656#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000657
658/**
Owen Taylor3473f882001-02-23 17:55:21 +0000659 * UTF16BEToUTF8:
660 * @out: a pointer to an array of bytes to store the result
661 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000662 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000663 * @inlenb: the length of @in in UTF-16 chars
664 *
665 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000666 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000667 * is the same between the native type of this machine and the
668 * inputed one.
669 *
William M. Brackf9415e42003-11-28 09:39:10 +0000670 * Returns the number of bytes written, or -1 if lack of space, or -2
671 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000672 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000673 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000674 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000675static int
Owen Taylor3473f882001-02-23 17:55:21 +0000676UTF16BEToUTF8(unsigned char* out, int *outlen,
677 const unsigned char* inb, int *inlenb)
678{
679 unsigned char* outstart = out;
680 const unsigned char* processed = inb;
681 unsigned char* outend = out + *outlen;
682 unsigned short* in = (unsigned short*) inb;
683 unsigned short* inend;
684 unsigned int c, d, inlen;
685 unsigned char *tmp;
686 int bits;
687
688 if ((*inlenb % 2) == 1)
689 (*inlenb)--;
690 inlen = *inlenb / 2;
691 inend= in + inlen;
692 while (in < inend) {
693 if (xmlLittleEndian) {
694 tmp = (unsigned char *) in;
695 c = *tmp++;
696 c = c << 8;
697 c = c | (unsigned int) *tmp;
698 in++;
699 } else {
700 c= *in++;
701 }
702 if ((c & 0xFC00) == 0xD800) { /* surrogates */
703 if (in >= inend) { /* (in > inend) shouldn't happens */
704 *outlen = out - outstart;
705 *inlenb = processed - inb;
706 return(-2);
707 }
708 if (xmlLittleEndian) {
709 tmp = (unsigned char *) in;
710 d = *tmp++;
711 d = d << 8;
712 d = d | (unsigned int) *tmp;
713 in++;
714 } else {
715 d= *in++;
716 }
717 if ((d & 0xFC00) == 0xDC00) {
718 c &= 0x03FF;
719 c <<= 10;
720 c |= d & 0x03FF;
721 c += 0x10000;
722 }
723 else {
724 *outlen = out - outstart;
725 *inlenb = processed - inb;
726 return(-2);
727 }
728 }
729
730 /* assertion: c is a single UTF-4 value */
731 if (out >= outend)
732 break;
733 if (c < 0x80) { *out++= c; bits= -6; }
734 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
735 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
736 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
737
738 for ( ; bits >= 0; bits-= 6) {
739 if (out >= outend)
740 break;
741 *out++= ((c >> bits) & 0x3F) | 0x80;
742 }
743 processed = (const unsigned char*) in;
744 }
745 *outlen = out - outstart;
746 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000747 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000748}
749
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000750#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000751/**
752 * UTF8ToUTF16BE:
753 * @outb: a pointer to an array of bytes to store the result
754 * @outlen: the length of @outb
755 * @in: a pointer to an array of UTF-8 chars
756 * @inlen: the length of @in
757 *
758 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
759 * block of chars out.
760 *
761 * Returns the number of byte written, or -1 by lack of space, or -2
762 * if the transcoding failed.
763 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000764static int
Owen Taylor3473f882001-02-23 17:55:21 +0000765UTF8ToUTF16BE(unsigned char* outb, int *outlen,
766 const unsigned char* in, int *inlen)
767{
768 unsigned short* out = (unsigned short*) outb;
769 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000770 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000771 unsigned short* outstart= out;
772 unsigned short* outend;
773 const unsigned char* inend= in+*inlen;
774 unsigned int c, d;
775 int trailing;
776 unsigned char *tmp;
777 unsigned short tmp1, tmp2;
778
William M. Brackf9415e42003-11-28 09:39:10 +0000779 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000780 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000781 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000782 *outlen = 0;
783 *inlen = 0;
784 return(0);
785 }
786 outend = out + (*outlen / 2);
787 while (in < inend) {
788 d= *in++;
789 if (d < 0x80) { c= d; trailing= 0; }
790 else if (d < 0xC0) {
791 /* trailing byte in leading position */
792 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000793 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000794 return(-2);
795 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
796 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
797 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
798 else {
799 /* no chance for this in UTF-16 */
800 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000801 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000802 return(-2);
803 }
804
805 if (inend - in < trailing) {
806 break;
807 }
808
809 for ( ; trailing; trailing--) {
810 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
811 c <<= 6;
812 c |= d & 0x3F;
813 }
814
815 /* assertion: c is a single UTF-4 value */
816 if (c < 0x10000) {
817 if (out >= outend) break;
818 if (xmlLittleEndian) {
819 tmp = (unsigned char *) out;
820 *tmp = c >> 8;
821 *(tmp + 1) = c;
822 out++;
823 } else {
824 *out++ = c;
825 }
826 }
827 else if (c < 0x110000) {
828 if (out+1 >= outend) break;
829 c -= 0x10000;
830 if (xmlLittleEndian) {
831 tmp1 = 0xD800 | (c >> 10);
832 tmp = (unsigned char *) out;
833 *tmp = tmp1 >> 8;
834 *(tmp + 1) = (unsigned char) tmp1;
835 out++;
836
837 tmp2 = 0xDC00 | (c & 0x03FF);
838 tmp = (unsigned char *) out;
839 *tmp = tmp2 >> 8;
840 *(tmp + 1) = (unsigned char) tmp2;
841 out++;
842 } else {
843 *out++ = 0xD800 | (c >> 10);
844 *out++ = 0xDC00 | (c & 0x03FF);
845 }
846 }
847 else
848 break;
849 processed = in;
850 }
851 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000852 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000853 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000854}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000855#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000856
Daniel Veillard97ac1312001-05-30 19:14:17 +0000857/************************************************************************
858 * *
859 * Generic encoding handling routines *
860 * *
861 ************************************************************************/
862
Owen Taylor3473f882001-02-23 17:55:21 +0000863/**
864 * xmlDetectCharEncoding:
865 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000866 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000867 * @len: pointer to the length of the buffer
868 *
869 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000870 * according to the non-normative appendix F of the XML-1.0 recommendation.
Owen Taylor3473f882001-02-23 17:55:21 +0000871 *
872 * Returns one of the XML_CHAR_ENCODING_... values.
873 */
874xmlCharEncoding
875xmlDetectCharEncoding(const unsigned char* in, int len)
876{
Daniel Veillardce682bc2004-11-05 17:22:25 +0000877 if (in == NULL)
878 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000879 if (len >= 4) {
880 if ((in[0] == 0x00) && (in[1] == 0x00) &&
881 (in[2] == 0x00) && (in[3] == 0x3C))
882 return(XML_CHAR_ENCODING_UCS4BE);
883 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
884 (in[2] == 0x00) && (in[3] == 0x00))
885 return(XML_CHAR_ENCODING_UCS4LE);
886 if ((in[0] == 0x00) && (in[1] == 0x00) &&
887 (in[2] == 0x3C) && (in[3] == 0x00))
888 return(XML_CHAR_ENCODING_UCS4_2143);
889 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
890 (in[2] == 0x00) && (in[3] == 0x00))
891 return(XML_CHAR_ENCODING_UCS4_3412);
892 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
893 (in[2] == 0xA7) && (in[3] == 0x94))
894 return(XML_CHAR_ENCODING_EBCDIC);
895 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
896 (in[2] == 0x78) && (in[3] == 0x6D))
897 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000898 /*
899 * Although not part of the recommendation, we also
900 * attempt an "auto-recognition" of UTF-16LE and
901 * UTF-16BE encodings.
902 */
903 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
904 (in[2] == 0x3F) && (in[3] == 0x00))
905 return(XML_CHAR_ENCODING_UTF16LE);
906 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
907 (in[2] == 0x00) && (in[3] == 0x3F))
908 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000909 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000910 if (len >= 3) {
911 /*
912 * Errata on XML-1.0 June 20 2001
913 * We now allow an UTF8 encoded BOM
914 */
915 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
916 (in[2] == 0xBF))
917 return(XML_CHAR_ENCODING_UTF8);
918 }
William M. Brackf9415e42003-11-28 09:39:10 +0000919 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000920 if (len >= 2) {
921 if ((in[0] == 0xFE) && (in[1] == 0xFF))
922 return(XML_CHAR_ENCODING_UTF16BE);
923 if ((in[0] == 0xFF) && (in[1] == 0xFE))
924 return(XML_CHAR_ENCODING_UTF16LE);
925 }
926 return(XML_CHAR_ENCODING_NONE);
927}
928
929/**
930 * xmlCleanupEncodingAliases:
931 *
932 * Unregisters all aliases
933 */
934void
935xmlCleanupEncodingAliases(void) {
936 int i;
937
938 if (xmlCharEncodingAliases == NULL)
939 return;
940
941 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
942 if (xmlCharEncodingAliases[i].name != NULL)
943 xmlFree((char *) xmlCharEncodingAliases[i].name);
944 if (xmlCharEncodingAliases[i].alias != NULL)
945 xmlFree((char *) xmlCharEncodingAliases[i].alias);
946 }
947 xmlCharEncodingAliasesNb = 0;
948 xmlCharEncodingAliasesMax = 0;
949 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000950 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000951}
952
953/**
954 * xmlGetEncodingAlias:
955 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
956 *
957 * Lookup an encoding name for the given alias.
958 *
William M. Brackf9415e42003-11-28 09:39:10 +0000959 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +0000960 */
961const char *
962xmlGetEncodingAlias(const char *alias) {
963 int i;
964 char upper[100];
965
966 if (alias == NULL)
967 return(NULL);
968
969 if (xmlCharEncodingAliases == NULL)
970 return(NULL);
971
972 for (i = 0;i < 99;i++) {
973 upper[i] = toupper(alias[i]);
974 if (upper[i] == 0) break;
975 }
976 upper[i] = 0;
977
978 /*
979 * Walk down the list looking for a definition of the alias
980 */
981 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
982 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
983 return(xmlCharEncodingAliases[i].name);
984 }
985 }
986 return(NULL);
987}
988
989/**
990 * xmlAddEncodingAlias:
991 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
992 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
993 *
William M. Brackf9415e42003-11-28 09:39:10 +0000994 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +0000995 * will be overwritten.
996 *
997 * Returns 0 in case of success, -1 in case of error
998 */
999int
1000xmlAddEncodingAlias(const char *name, const char *alias) {
1001 int i;
1002 char upper[100];
1003
1004 if ((name == NULL) || (alias == NULL))
1005 return(-1);
1006
1007 for (i = 0;i < 99;i++) {
1008 upper[i] = toupper(alias[i]);
1009 if (upper[i] == 0) break;
1010 }
1011 upper[i] = 0;
1012
1013 if (xmlCharEncodingAliases == NULL) {
1014 xmlCharEncodingAliasesNb = 0;
1015 xmlCharEncodingAliasesMax = 20;
1016 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1017 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1018 if (xmlCharEncodingAliases == NULL)
1019 return(-1);
1020 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1021 xmlCharEncodingAliasesMax *= 2;
1022 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1023 xmlRealloc(xmlCharEncodingAliases,
1024 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1025 }
1026 /*
1027 * Walk down the list looking for a definition of the alias
1028 */
1029 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1030 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1031 /*
1032 * Replace the definition.
1033 */
1034 xmlFree((char *) xmlCharEncodingAliases[i].name);
1035 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1036 return(0);
1037 }
1038 }
1039 /*
1040 * Add the definition
1041 */
1042 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1043 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1044 xmlCharEncodingAliasesNb++;
1045 return(0);
1046}
1047
1048/**
1049 * xmlDelEncodingAlias:
1050 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1051 *
1052 * Unregisters an encoding alias @alias
1053 *
1054 * Returns 0 in case of success, -1 in case of error
1055 */
1056int
1057xmlDelEncodingAlias(const char *alias) {
1058 int i;
1059
1060 if (alias == NULL)
1061 return(-1);
1062
1063 if (xmlCharEncodingAliases == NULL)
1064 return(-1);
1065 /*
1066 * Walk down the list looking for a definition of the alias
1067 */
1068 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1069 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1070 xmlFree((char *) xmlCharEncodingAliases[i].name);
1071 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1072 xmlCharEncodingAliasesNb--;
1073 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1074 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1075 return(0);
1076 }
1077 }
1078 return(-1);
1079}
1080
1081/**
1082 * xmlParseCharEncoding:
1083 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1084 *
William M. Brackf9415e42003-11-28 09:39:10 +00001085 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * that the comparison is case insensitive accordingly to the section
1087 * [XML] 4.3.3 Character Encoding in Entities.
1088 *
1089 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1090 * if not recognized.
1091 */
1092xmlCharEncoding
1093xmlParseCharEncoding(const char* name)
1094{
1095 const char *alias;
1096 char upper[500];
1097 int i;
1098
1099 if (name == NULL)
1100 return(XML_CHAR_ENCODING_NONE);
1101
1102 /*
1103 * Do the alias resolution
1104 */
1105 alias = xmlGetEncodingAlias(name);
1106 if (alias != NULL)
1107 name = alias;
1108
1109 for (i = 0;i < 499;i++) {
1110 upper[i] = toupper(name[i]);
1111 if (upper[i] == 0) break;
1112 }
1113 upper[i] = 0;
1114
1115 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1116 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1117 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1118
1119 /*
1120 * NOTE: if we were able to parse this, the endianness of UTF16 is
1121 * already found and in use
1122 */
1123 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1124 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1125
1126 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1127 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1128 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1129
1130 /*
1131 * NOTE: if we were able to parse this, the endianness of UCS4 is
1132 * already found and in use
1133 */
1134 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1135 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1136 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1137
1138
1139 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1140 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1141 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1142
1143 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1144 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1145 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1146
1147 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1148 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1149 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1150 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1151 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1152 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1153 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1154
1155 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1156 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1157 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1158
1159#ifdef DEBUG_ENCODING
1160 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1161#endif
1162 return(XML_CHAR_ENCODING_ERROR);
1163}
1164
1165/**
1166 * xmlGetCharEncodingName:
1167 * @enc: the encoding
1168 *
1169 * The "canonical" name for XML encoding.
1170 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1171 * Section 4.3.3 Character Encoding in Entities
1172 *
1173 * Returns the canonical name for the given encoding
1174 */
1175
1176const char*
1177xmlGetCharEncodingName(xmlCharEncoding enc) {
1178 switch (enc) {
1179 case XML_CHAR_ENCODING_ERROR:
1180 return(NULL);
1181 case XML_CHAR_ENCODING_NONE:
1182 return(NULL);
1183 case XML_CHAR_ENCODING_UTF8:
1184 return("UTF-8");
1185 case XML_CHAR_ENCODING_UTF16LE:
1186 return("UTF-16");
1187 case XML_CHAR_ENCODING_UTF16BE:
1188 return("UTF-16");
1189 case XML_CHAR_ENCODING_EBCDIC:
1190 return("EBCDIC");
1191 case XML_CHAR_ENCODING_UCS4LE:
1192 return("ISO-10646-UCS-4");
1193 case XML_CHAR_ENCODING_UCS4BE:
1194 return("ISO-10646-UCS-4");
1195 case XML_CHAR_ENCODING_UCS4_2143:
1196 return("ISO-10646-UCS-4");
1197 case XML_CHAR_ENCODING_UCS4_3412:
1198 return("ISO-10646-UCS-4");
1199 case XML_CHAR_ENCODING_UCS2:
1200 return("ISO-10646-UCS-2");
1201 case XML_CHAR_ENCODING_8859_1:
1202 return("ISO-8859-1");
1203 case XML_CHAR_ENCODING_8859_2:
1204 return("ISO-8859-2");
1205 case XML_CHAR_ENCODING_8859_3:
1206 return("ISO-8859-3");
1207 case XML_CHAR_ENCODING_8859_4:
1208 return("ISO-8859-4");
1209 case XML_CHAR_ENCODING_8859_5:
1210 return("ISO-8859-5");
1211 case XML_CHAR_ENCODING_8859_6:
1212 return("ISO-8859-6");
1213 case XML_CHAR_ENCODING_8859_7:
1214 return("ISO-8859-7");
1215 case XML_CHAR_ENCODING_8859_8:
1216 return("ISO-8859-8");
1217 case XML_CHAR_ENCODING_8859_9:
1218 return("ISO-8859-9");
1219 case XML_CHAR_ENCODING_2022_JP:
1220 return("ISO-2022-JP");
1221 case XML_CHAR_ENCODING_SHIFT_JIS:
1222 return("Shift-JIS");
1223 case XML_CHAR_ENCODING_EUC_JP:
1224 return("EUC-JP");
1225 case XML_CHAR_ENCODING_ASCII:
1226 return(NULL);
1227 }
1228 return(NULL);
1229}
1230
Daniel Veillard97ac1312001-05-30 19:14:17 +00001231/************************************************************************
1232 * *
1233 * Char encoding handlers *
1234 * *
1235 ************************************************************************/
1236
Owen Taylor3473f882001-02-23 17:55:21 +00001237
1238/* the size should be growable, but it's not a big deal ... */
1239#define MAX_ENCODING_HANDLERS 50
1240static xmlCharEncodingHandlerPtr *handlers = NULL;
1241static int nbCharEncodingHandler = 0;
1242
1243/*
1244 * The default is UTF-8 for XML, that's also the default used for the
1245 * parser internals, so the default encoding handler is NULL
1246 */
1247
1248static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1249
1250/**
1251 * xmlNewCharEncodingHandler:
1252 * @name: the encoding name, in UTF-8 format (ASCII actually)
1253 * @input: the xmlCharEncodingInputFunc to read that encoding
1254 * @output: the xmlCharEncodingOutputFunc to write that encoding
1255 *
1256 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001257 *
Owen Taylor3473f882001-02-23 17:55:21 +00001258 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1259 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001260xmlCharEncodingHandlerPtr
Owen Taylor3473f882001-02-23 17:55:21 +00001261xmlNewCharEncodingHandler(const char *name,
1262 xmlCharEncodingInputFunc input,
1263 xmlCharEncodingOutputFunc output) {
1264 xmlCharEncodingHandlerPtr handler;
1265 const char *alias;
1266 char upper[500];
1267 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001268 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001269
1270 /*
1271 * Do the alias resolution
1272 */
1273 alias = xmlGetEncodingAlias(name);
1274 if (alias != NULL)
1275 name = alias;
1276
1277 /*
1278 * Keep only the uppercase version of the encoding.
1279 */
1280 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001281 xmlEncodingErr(XML_I18N_NO_NAME,
1282 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001283 return(NULL);
1284 }
1285 for (i = 0;i < 499;i++) {
1286 upper[i] = toupper(name[i]);
1287 if (upper[i] == 0) break;
1288 }
1289 upper[i] = 0;
1290 up = xmlMemStrdup(upper);
1291 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001292 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001293 return(NULL);
1294 }
1295
1296 /*
1297 * allocate and fill-up an handler block.
1298 */
1299 handler = (xmlCharEncodingHandlerPtr)
1300 xmlMalloc(sizeof(xmlCharEncodingHandler));
1301 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001302 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001303 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001304 return(NULL);
1305 }
1306 handler->input = input;
1307 handler->output = output;
1308 handler->name = up;
1309
1310#ifdef LIBXML_ICONV_ENABLED
1311 handler->iconv_in = NULL;
1312 handler->iconv_out = NULL;
1313#endif /* LIBXML_ICONV_ENABLED */
1314
1315 /*
1316 * registers and returns the handler.
1317 */
1318 xmlRegisterCharEncodingHandler(handler);
1319#ifdef DEBUG_ENCODING
1320 xmlGenericError(xmlGenericErrorContext,
1321 "Registered encoding handler for %s\n", name);
1322#endif
1323 return(handler);
1324}
1325
1326/**
1327 * xmlInitCharEncodingHandlers:
1328 *
1329 * Initialize the char encoding support, it registers the default
1330 * encoding supported.
1331 * NOTE: while public, this function usually doesn't need to be called
1332 * in normal processing.
1333 */
1334void
1335xmlInitCharEncodingHandlers(void) {
1336 unsigned short int tst = 0x1234;
1337 unsigned char *ptr = (unsigned char *) &tst;
1338
1339 if (handlers != NULL) return;
1340
1341 handlers = (xmlCharEncodingHandlerPtr *)
1342 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1343
1344 if (*ptr == 0x12) xmlLittleEndian = 0;
1345 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001346 else {
1347 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1348 "Odd problem at endianness detection\n", NULL);
1349 }
Owen Taylor3473f882001-02-23 17:55:21 +00001350
1351 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001352 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001353 return;
1354 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001355 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001356#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00001357 xmlUTF16LEHandler =
1358 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1359 xmlUTF16BEHandler =
1360 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001361 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001362 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1363 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001364 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001365#ifdef LIBXML_HTML_ENABLED
1366 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1367#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001368#else
1369 xmlUTF16LEHandler =
1370 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1371 xmlUTF16BEHandler =
1372 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001373 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001374 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1375 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1376 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1377#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001378#ifndef LIBXML_ICONV_ENABLED
1379#ifdef LIBXML_ISO8859X_ENABLED
1380 xmlRegisterCharEncodingHandlersISO8859x ();
1381#endif
1382#endif
1383
Owen Taylor3473f882001-02-23 17:55:21 +00001384}
1385
1386/**
1387 * xmlCleanupCharEncodingHandlers:
1388 *
1389 * Cleanup the memory allocated for the char encoding support, it
1390 * unregisters all the encoding handlers and the aliases.
1391 */
1392void
1393xmlCleanupCharEncodingHandlers(void) {
1394 xmlCleanupEncodingAliases();
1395
1396 if (handlers == NULL) return;
1397
1398 for (;nbCharEncodingHandler > 0;) {
1399 nbCharEncodingHandler--;
1400 if (handlers[nbCharEncodingHandler] != NULL) {
1401 if (handlers[nbCharEncodingHandler]->name != NULL)
1402 xmlFree(handlers[nbCharEncodingHandler]->name);
1403 xmlFree(handlers[nbCharEncodingHandler]);
1404 }
1405 }
1406 xmlFree(handlers);
1407 handlers = NULL;
1408 nbCharEncodingHandler = 0;
1409 xmlDefaultCharEncodingHandler = NULL;
1410}
1411
1412/**
1413 * xmlRegisterCharEncodingHandler:
1414 * @handler: the xmlCharEncodingHandlerPtr handler block
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418void
1419xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1420 if (handlers == NULL) xmlInitCharEncodingHandlers();
1421 if (handler == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001422 xmlEncodingErr(XML_I18N_NO_HANDLER,
1423 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001424 return;
1425 }
1426
1427 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001428 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1429 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1430 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001431 return;
1432 }
1433 handlers[nbCharEncodingHandler++] = handler;
1434}
1435
1436/**
1437 * xmlGetCharEncodingHandler:
1438 * @enc: an xmlCharEncoding value.
1439 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001440 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001441 *
1442 * Returns the handler or NULL if not found
1443 */
1444xmlCharEncodingHandlerPtr
1445xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1446 xmlCharEncodingHandlerPtr handler;
1447
1448 if (handlers == NULL) xmlInitCharEncodingHandlers();
1449 switch (enc) {
1450 case XML_CHAR_ENCODING_ERROR:
1451 return(NULL);
1452 case XML_CHAR_ENCODING_NONE:
1453 return(NULL);
1454 case XML_CHAR_ENCODING_UTF8:
1455 return(NULL);
1456 case XML_CHAR_ENCODING_UTF16LE:
1457 return(xmlUTF16LEHandler);
1458 case XML_CHAR_ENCODING_UTF16BE:
1459 return(xmlUTF16BEHandler);
1460 case XML_CHAR_ENCODING_EBCDIC:
1461 handler = xmlFindCharEncodingHandler("EBCDIC");
1462 if (handler != NULL) return(handler);
1463 handler = xmlFindCharEncodingHandler("ebcdic");
1464 if (handler != NULL) return(handler);
1465 break;
1466 case XML_CHAR_ENCODING_UCS4BE:
1467 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1468 if (handler != NULL) return(handler);
1469 handler = xmlFindCharEncodingHandler("UCS-4");
1470 if (handler != NULL) return(handler);
1471 handler = xmlFindCharEncodingHandler("UCS4");
1472 if (handler != NULL) return(handler);
1473 break;
1474 case XML_CHAR_ENCODING_UCS4LE:
1475 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1476 if (handler != NULL) return(handler);
1477 handler = xmlFindCharEncodingHandler("UCS-4");
1478 if (handler != NULL) return(handler);
1479 handler = xmlFindCharEncodingHandler("UCS4");
1480 if (handler != NULL) return(handler);
1481 break;
1482 case XML_CHAR_ENCODING_UCS4_2143:
1483 break;
1484 case XML_CHAR_ENCODING_UCS4_3412:
1485 break;
1486 case XML_CHAR_ENCODING_UCS2:
1487 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1488 if (handler != NULL) return(handler);
1489 handler = xmlFindCharEncodingHandler("UCS-2");
1490 if (handler != NULL) return(handler);
1491 handler = xmlFindCharEncodingHandler("UCS2");
1492 if (handler != NULL) return(handler);
1493 break;
1494
1495 /*
1496 * We used to keep ISO Latin encodings native in the
1497 * generated data. This led to so many problems that
1498 * this has been removed. One can still change this
1499 * back by registering no-ops encoders for those
1500 */
1501 case XML_CHAR_ENCODING_8859_1:
1502 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1503 if (handler != NULL) return(handler);
1504 break;
1505 case XML_CHAR_ENCODING_8859_2:
1506 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1507 if (handler != NULL) return(handler);
1508 break;
1509 case XML_CHAR_ENCODING_8859_3:
1510 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1511 if (handler != NULL) return(handler);
1512 break;
1513 case XML_CHAR_ENCODING_8859_4:
1514 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1515 if (handler != NULL) return(handler);
1516 break;
1517 case XML_CHAR_ENCODING_8859_5:
1518 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1519 if (handler != NULL) return(handler);
1520 break;
1521 case XML_CHAR_ENCODING_8859_6:
1522 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1523 if (handler != NULL) return(handler);
1524 break;
1525 case XML_CHAR_ENCODING_8859_7:
1526 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1527 if (handler != NULL) return(handler);
1528 break;
1529 case XML_CHAR_ENCODING_8859_8:
1530 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1531 if (handler != NULL) return(handler);
1532 break;
1533 case XML_CHAR_ENCODING_8859_9:
1534 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1535 if (handler != NULL) return(handler);
1536 break;
1537
1538
1539 case XML_CHAR_ENCODING_2022_JP:
1540 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1541 if (handler != NULL) return(handler);
1542 break;
1543 case XML_CHAR_ENCODING_SHIFT_JIS:
1544 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1545 if (handler != NULL) return(handler);
1546 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1547 if (handler != NULL) return(handler);
1548 handler = xmlFindCharEncodingHandler("Shift_JIS");
1549 if (handler != NULL) return(handler);
1550 break;
1551 case XML_CHAR_ENCODING_EUC_JP:
1552 handler = xmlFindCharEncodingHandler("EUC-JP");
1553 if (handler != NULL) return(handler);
1554 break;
1555 default:
1556 break;
1557 }
1558
1559#ifdef DEBUG_ENCODING
1560 xmlGenericError(xmlGenericErrorContext,
1561 "No handler found for encoding %d\n", enc);
1562#endif
1563 return(NULL);
1564}
1565
1566/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001567 * xmlFindCharEncodingHandler:
1568 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001569 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001570 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001571 *
1572 * Returns the handler or NULL if not found
1573 */
1574xmlCharEncodingHandlerPtr
1575xmlFindCharEncodingHandler(const char *name) {
1576 const char *nalias;
1577 const char *norig;
1578 xmlCharEncoding alias;
1579#ifdef LIBXML_ICONV_ENABLED
1580 xmlCharEncodingHandlerPtr enc;
1581 iconv_t icv_in, icv_out;
1582#endif /* LIBXML_ICONV_ENABLED */
1583 char upper[100];
1584 int i;
1585
1586 if (handlers == NULL) xmlInitCharEncodingHandlers();
1587 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1588 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1589
1590 /*
1591 * Do the alias resolution
1592 */
1593 norig = name;
1594 nalias = xmlGetEncodingAlias(name);
1595 if (nalias != NULL)
1596 name = nalias;
1597
1598 /*
1599 * Check first for directly registered encoding names
1600 */
1601 for (i = 0;i < 99;i++) {
1602 upper[i] = toupper(name[i]);
1603 if (upper[i] == 0) break;
1604 }
1605 upper[i] = 0;
1606
1607 for (i = 0;i < nbCharEncodingHandler; i++)
1608 if (!strcmp(upper, handlers[i]->name)) {
1609#ifdef DEBUG_ENCODING
1610 xmlGenericError(xmlGenericErrorContext,
1611 "Found registered handler for encoding %s\n", name);
1612#endif
1613 return(handlers[i]);
1614 }
1615
1616#ifdef LIBXML_ICONV_ENABLED
1617 /* check whether iconv can handle this */
1618 icv_in = iconv_open("UTF-8", name);
1619 icv_out = iconv_open(name, "UTF-8");
1620 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1621 enc = (xmlCharEncodingHandlerPtr)
1622 xmlMalloc(sizeof(xmlCharEncodingHandler));
1623 if (enc == NULL) {
1624 iconv_close(icv_in);
1625 iconv_close(icv_out);
1626 return(NULL);
1627 }
1628 enc->name = xmlMemStrdup(name);
1629 enc->input = NULL;
1630 enc->output = NULL;
1631 enc->iconv_in = icv_in;
1632 enc->iconv_out = icv_out;
1633#ifdef DEBUG_ENCODING
1634 xmlGenericError(xmlGenericErrorContext,
1635 "Found iconv handler for encoding %s\n", name);
1636#endif
1637 return enc;
1638 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001639 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001640 "iconv : problems with filters for '%s'\n", name);
1641 }
1642#endif /* LIBXML_ICONV_ENABLED */
1643
1644#ifdef DEBUG_ENCODING
1645 xmlGenericError(xmlGenericErrorContext,
1646 "No handler found for encoding %s\n", name);
1647#endif
1648
1649 /*
1650 * Fallback using the canonical names
1651 */
1652 alias = xmlParseCharEncoding(norig);
1653 if (alias != XML_CHAR_ENCODING_ERROR) {
1654 const char* canon;
1655 canon = xmlGetCharEncodingName(alias);
1656 if ((canon != NULL) && (strcmp(name, canon))) {
1657 return(xmlFindCharEncodingHandler(canon));
1658 }
1659 }
1660
William M. Brackf9415e42003-11-28 09:39:10 +00001661 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001662 return(NULL);
1663}
1664
Daniel Veillard97ac1312001-05-30 19:14:17 +00001665/************************************************************************
1666 * *
1667 * ICONV based generic conversion functions *
1668 * *
1669 ************************************************************************/
1670
Owen Taylor3473f882001-02-23 17:55:21 +00001671#ifdef LIBXML_ICONV_ENABLED
1672/**
1673 * xmlIconvWrapper:
1674 * @cd: iconv converter data structure
1675 * @out: a pointer to an array of bytes to store the result
1676 * @outlen: the length of @out
1677 * @in: a pointer to an array of ISO Latin 1 chars
1678 * @inlen: the length of @in
1679 *
1680 * Returns 0 if success, or
1681 * -1 by lack of space, or
1682 * -2 if the transcoding fails (for *in is not valid utf8 string or
1683 * the result of transformation can't fit into the encoding we want), or
1684 * -3 if there the last byte can't form a single output char.
1685 *
1686 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001687 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001688 * The value of @outlen after return is the number of ocetes consumed.
1689 */
1690static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001691xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1692 const unsigned char *in, int *inlen) {
1693 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001694 const char *icv_in = (const char *) in;
1695 char *icv_out = (char *) out;
1696 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001697
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001698 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1699 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001700 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001701 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001702 icv_inlen = *inlen;
1703 icv_outlen = *outlen;
Darin Adler699613b2001-07-27 22:47:14 +00001704 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard9403a042001-05-28 11:00:53 +00001705 if (in != NULL) {
1706 *inlen -= icv_inlen;
1707 *outlen -= icv_outlen;
1708 } else {
1709 *inlen = 0;
1710 *outlen = 0;
1711 }
1712 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001713#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001714 if (errno == EILSEQ) {
1715 return -2;
1716 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001717#endif
1718#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001719 if (errno == E2BIG) {
1720 return -1;
1721 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001722#endif
1723#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001724 if (errno == EINVAL) {
1725 return -3;
1726 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001727#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001728 {
1729 return -3;
1730 }
1731 }
1732 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001733}
1734#endif /* LIBXML_ICONV_ENABLED */
1735
Daniel Veillard97ac1312001-05-30 19:14:17 +00001736/************************************************************************
1737 * *
1738 * The real API used by libxml for on-the-fly conversion *
1739 * *
1740 ************************************************************************/
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742/**
1743 * xmlCharEncFirstLine:
1744 * @handler: char enconding transformation data structure
1745 * @out: an xmlBuffer for the output.
1746 * @in: an xmlBuffer for the input
1747 *
1748 * Front-end for the encoding handler input function, but handle only
1749 * the very first line, i.e. limit itself to 45 chars.
1750 *
1751 * Returns the number of byte written if success, or
1752 * -1 general error
1753 * -2 if the transcoding fails (for *in is not valid utf8 string or
1754 * the result of transformation can't fit into the encoding we want), or
1755 */
1756int
1757xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1758 xmlBufferPtr in) {
1759 int ret = -2;
1760 int written;
1761 int toconv;
1762
1763 if (handler == NULL) return(-1);
1764 if (out == NULL) return(-1);
1765 if (in == NULL) return(-1);
1766
1767 written = out->size - out->use;
1768 toconv = in->use;
1769 if (toconv * 2 >= written) {
1770 xmlBufferGrow(out, toconv);
1771 written = out->size - out->use - 1;
1772 }
1773
1774 /*
1775 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1776 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001777 * declaration without going too far inside the document content.
Owen Taylor3473f882001-02-23 17:55:21 +00001778 */
1779 written = 45;
1780
1781 if (handler->input != NULL) {
1782 ret = handler->input(&out->content[out->use], &written,
1783 in->content, &toconv);
1784 xmlBufferShrink(in, toconv);
1785 out->use += written;
1786 out->content[out->use] = 0;
1787 }
1788#ifdef LIBXML_ICONV_ENABLED
1789 else if (handler->iconv_in != NULL) {
1790 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1791 &written, in->content, &toconv);
1792 xmlBufferShrink(in, toconv);
1793 out->use += written;
1794 out->content[out->use] = 0;
1795 if (ret == -1) ret = -3;
1796 }
1797#endif /* LIBXML_ICONV_ENABLED */
1798#ifdef DEBUG_ENCODING
1799 switch (ret) {
1800 case 0:
1801 xmlGenericError(xmlGenericErrorContext,
1802 "converted %d bytes to %d bytes of input\n",
1803 toconv, written);
1804 break;
1805 case -1:
1806 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1807 toconv, written, in->use);
1808 break;
1809 case -2:
1810 xmlGenericError(xmlGenericErrorContext,
1811 "input conversion failed due to input error\n");
1812 break;
1813 case -3:
1814 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1815 toconv, written, in->use);
1816 break;
1817 default:
1818 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1819 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001820#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00001821 /*
1822 * Ignore when input buffer is not on a boundary
1823 */
1824 if (ret == -3) ret = 0;
1825 if (ret == -1) ret = 0;
1826 return(ret);
1827}
1828
1829/**
1830 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001831 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00001832 * @out: an xmlBuffer for the output.
1833 * @in: an xmlBuffer for the input
1834 *
1835 * Generic front-end for the encoding handler input function
1836 *
1837 * Returns the number of byte written if success, or
1838 * -1 general error
1839 * -2 if the transcoding fails (for *in is not valid utf8 string or
1840 * the result of transformation can't fit into the encoding we want), or
1841 */
1842int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001843xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1844 xmlBufferPtr in)
1845{
Owen Taylor3473f882001-02-23 17:55:21 +00001846 int ret = -2;
1847 int written;
1848 int toconv;
1849
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001850 if (handler == NULL)
1851 return (-1);
1852 if (out == NULL)
1853 return (-1);
1854 if (in == NULL)
1855 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857 toconv = in->use;
1858 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001859 return (0);
Owen Taylor3473f882001-02-23 17:55:21 +00001860 written = out->size - out->use;
1861 if (toconv * 2 >= written) {
1862 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001863 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001864 }
1865 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001866 ret = handler->input(&out->content[out->use], &written,
1867 in->content, &toconv);
1868 xmlBufferShrink(in, toconv);
1869 out->use += written;
1870 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001871 }
1872#ifdef LIBXML_ICONV_ENABLED
1873 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001874 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1875 &written, in->content, &toconv);
1876 xmlBufferShrink(in, toconv);
1877 out->use += written;
1878 out->content[out->use] = 0;
1879 if (ret == -1)
1880 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00001881 }
1882#endif /* LIBXML_ICONV_ENABLED */
1883 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001885#ifdef DEBUG_ENCODING
1886 xmlGenericError(xmlGenericErrorContext,
1887 "converted %d bytes to %d bytes of input\n",
1888 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00001889#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001890 break;
1891 case -1:
1892#ifdef DEBUG_ENCODING
1893 xmlGenericError(xmlGenericErrorContext,
1894 "converted %d bytes to %d bytes of input, %d left\n",
1895 toconv, written, in->use);
1896#endif
1897 break;
1898 case -3:
1899#ifdef DEBUG_ENCODING
1900 xmlGenericError(xmlGenericErrorContext,
1901 "converted %d bytes to %d bytes of input, %d left\n",
1902 toconv, written, in->use);
1903#endif
1904 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001905 case -2: {
1906 char buf[50];
1907
Daniel Veillard2e7598c2005-09-02 12:28:34 +00001908 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001909 in->content[0], in->content[1],
1910 in->content[2], in->content[3]);
1911 buf[49] = 0;
1912 xmlEncodingErr(XML_I18N_CONV_FAILED,
1913 "input conversion failed due to input error, bytes %s\n",
1914 buf);
1915 }
Owen Taylor3473f882001-02-23 17:55:21 +00001916 }
1917 /*
1918 * Ignore when input buffer is not on a boundary
1919 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00001920 if (ret == -3)
1921 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00001922 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001923}
1924
1925/**
1926 * xmlCharEncOutFunc:
1927 * @handler: char enconding transformation data structure
1928 * @out: an xmlBuffer for the output.
1929 * @in: an xmlBuffer for the input
1930 *
1931 * Generic front-end for the encoding handler output function
1932 * a first call with @in == NULL has to be made firs to initiate the
1933 * output in case of non-stateless encoding needing to initiate their
1934 * state or the output (like the BOM in UTF16).
1935 * In case of UTF8 sequence conversion errors for the given encoder,
1936 * the content will be automatically remapped to a CharRef sequence.
1937 *
1938 * Returns the number of byte written if success, or
1939 * -1 general error
1940 * -2 if the transcoding fails (for *in is not valid utf8 string or
1941 * the result of transformation can't fit into the encoding we want), or
1942 */
1943int
1944xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1945 xmlBufferPtr in) {
1946 int ret = -2;
1947 int written;
1948 int writtentot = 0;
1949 int toconv;
1950 int output = 0;
1951
1952 if (handler == NULL) return(-1);
1953 if (out == NULL) return(-1);
1954
1955retry:
1956
1957 written = out->size - out->use;
1958
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001959 if (written > 0)
1960 written--; /* Gennady: count '/0' */
1961
Owen Taylor3473f882001-02-23 17:55:21 +00001962 /*
1963 * First specific handling of in = NULL, i.e. the initialization call
1964 */
1965 if (in == NULL) {
1966 toconv = 0;
1967 if (handler->output != NULL) {
1968 ret = handler->output(&out->content[out->use], &written,
1969 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00001970 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00001971 out->use += written;
1972 out->content[out->use] = 0;
1973 }
Owen Taylor3473f882001-02-23 17:55:21 +00001974 }
1975#ifdef LIBXML_ICONV_ENABLED
1976 else if (handler->iconv_out != NULL) {
1977 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1978 &written, NULL, &toconv);
1979 out->use += written;
1980 out->content[out->use] = 0;
1981 }
1982#endif /* LIBXML_ICONV_ENABLED */
1983#ifdef DEBUG_ENCODING
1984 xmlGenericError(xmlGenericErrorContext,
1985 "initialized encoder\n");
1986#endif
1987 return(0);
1988 }
1989
1990 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001991 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00001992 */
1993 toconv = in->use;
1994 if (toconv == 0)
1995 return(0);
1996 if (toconv * 2 >= written) {
1997 xmlBufferGrow(out, toconv * 2);
1998 written = out->size - out->use - 1;
1999 }
2000 if (handler->output != NULL) {
2001 ret = handler->output(&out->content[out->use], &written,
2002 in->content, &toconv);
2003 xmlBufferShrink(in, toconv);
2004 out->use += written;
2005 writtentot += written;
2006 out->content[out->use] = 0;
2007 }
2008#ifdef LIBXML_ICONV_ENABLED
2009 else if (handler->iconv_out != NULL) {
2010 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2011 &written, in->content, &toconv);
2012 xmlBufferShrink(in, toconv);
2013 out->use += written;
2014 writtentot += written;
2015 out->content[out->use] = 0;
2016 if (ret == -1) {
2017 if (written > 0) {
2018 /*
2019 * Can be a limitation of iconv
2020 */
2021 goto retry;
2022 }
2023 ret = -3;
2024 }
2025 }
2026#endif /* LIBXML_ICONV_ENABLED */
2027 else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002028 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2029 "xmlCharEncOutFunc: no output function !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002030 return(-1);
2031 }
2032
2033 if (ret >= 0) output += ret;
2034
2035 /*
2036 * Attempt to handle error cases
2037 */
2038 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002039 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002040#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002041 xmlGenericError(xmlGenericErrorContext,
2042 "converted %d bytes to %d bytes of output\n",
2043 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002044#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002045 break;
2046 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002047#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002048 xmlGenericError(xmlGenericErrorContext,
2049 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002050#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002051 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002053#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002054 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2055 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002056#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002057 break;
2058 case -2: {
2059 int len = in->use;
2060 const xmlChar *utf = (const xmlChar *) in->content;
2061 int cur;
2062
2063 cur = xmlGetUTF8Char(utf, &len);
2064 if (cur > 0) {
2065 xmlChar charref[20];
2066
2067#ifdef DEBUG_ENCODING
2068 xmlGenericError(xmlGenericErrorContext,
2069 "handling output conversion error\n");
2070 xmlGenericError(xmlGenericErrorContext,
2071 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2072 in->content[0], in->content[1],
2073 in->content[2], in->content[3]);
2074#endif
2075 /*
2076 * Removes the UTF8 sequence, and replace it by a charref
2077 * and continue the transcoding phase, hoping the error
2078 * did not mangle the encoder state.
2079 */
Daniel Veillard2e7598c2005-09-02 12:28:34 +00002080 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002081 xmlBufferShrink(in, len);
2082 xmlBufferAddHead(in, charref, -1);
2083
2084 goto retry;
2085 } else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002086 char buf[50];
2087
Daniel Veillard2e7598c2005-09-02 12:28:34 +00002088 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002089 in->content[0], in->content[1],
2090 in->content[2], in->content[3]);
2091 buf[49] = 0;
2092 xmlEncodingErr(XML_I18N_CONV_FAILED,
2093 "output conversion failed due to conv error, bytes %s\n",
2094 buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 in->content[0] = ' ';
2096 }
2097 break;
2098 }
2099 }
2100 return(ret);
2101}
2102
2103/**
2104 * xmlCharEncCloseFunc:
2105 * @handler: char enconding transformation data structure
2106 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002107 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002108 *
2109 * Returns 0 if success, or -1 in case of error
2110 */
2111int
2112xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2113 int ret = 0;
2114 if (handler == NULL) return(-1);
2115 if (handler->name == NULL) return(-1);
2116#ifdef LIBXML_ICONV_ENABLED
2117 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002118 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002119 * and the associated icon resources.
2120 */
2121 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2122 if (handler->name != NULL)
2123 xmlFree(handler->name);
2124 handler->name = NULL;
2125 if (handler->iconv_out != NULL) {
2126 if (iconv_close(handler->iconv_out))
2127 ret = -1;
2128 handler->iconv_out = NULL;
2129 }
2130 if (handler->iconv_in != NULL) {
2131 if (iconv_close(handler->iconv_in))
2132 ret = -1;
2133 handler->iconv_in = NULL;
2134 }
2135 xmlFree(handler);
2136 }
2137#endif /* LIBXML_ICONV_ENABLED */
2138#ifdef DEBUG_ENCODING
2139 if (ret)
2140 xmlGenericError(xmlGenericErrorContext,
2141 "failed to close the encoding handler\n");
2142 else
2143 xmlGenericError(xmlGenericErrorContext,
2144 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002145#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 return(ret);
2148}
2149
Daniel Veillard36711902004-02-11 13:25:26 +00002150/**
2151 * xmlByteConsumed:
2152 * @ctxt: an XML parser context
2153 *
2154 * This function provides the current index of the parser relative
2155 * to the start of the current entity. This function is computed in
2156 * bytes from the beginning starting at zero and finishing at the
2157 * size in byte of the file if parsing a file. The function is
2158 * of constant cost if the input is UTF-8 but can be costly if run
2159 * on non-UTF-8 input.
2160 *
2161 * Returns the index in bytes from the beginning of the entity or -1
2162 * in case the index could not be computed.
2163 */
2164long
2165xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2166 xmlParserInputPtr in;
2167
2168 if (ctxt == NULL) return(-1);
2169 in = ctxt->input;
2170 if (in == NULL) return(-1);
2171 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2172 unsigned int unused = 0;
2173 xmlCharEncodingHandler * handler = in->buf->encoder;
2174 /*
2175 * Encoding conversion, compute the number of unused original
2176 * bytes from the input not consumed and substract that from
2177 * the raw consumed value, this is not a cheap operation
2178 */
2179 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002180 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002181 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002182 int toconv = in->end - in->cur, written = 32000;
2183
2184 int ret;
2185
2186 if (handler->output != NULL) {
2187 do {
2188 toconv = in->end - cur;
2189 written = 32000;
2190 ret = handler->output(&convbuf[0], &written,
2191 cur, &toconv);
2192 if (ret == -1) return(-1);
2193 unused += written;
2194 cur += toconv;
2195 } while (ret == -2);
2196#ifdef LIBXML_ICONV_ENABLED
2197 } else if (handler->iconv_out != NULL) {
2198 do {
2199 toconv = in->end - cur;
2200 written = 32000;
2201 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2202 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002203 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002204 if (written > 0)
2205 ret = -2;
2206 else
2207 return(-1);
2208 }
2209 unused += written;
2210 cur += toconv;
2211 } while (ret == -2);
2212#endif
2213 } else {
2214 /* could not find a converter */
2215 return(-1);
2216 }
2217 }
2218 if (in->buf->rawconsumed < unused)
2219 return(-1);
2220 return(in->buf->rawconsumed - unused);
2221 }
2222 return(in->consumed + (in->cur - in->base));
2223}
2224
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002225#ifndef LIBXML_ICONV_ENABLED
2226#ifdef LIBXML_ISO8859X_ENABLED
2227
2228/**
2229 * UTF8ToISO8859x:
2230 * @out: a pointer to an array of bytes to store the result
2231 * @outlen: the length of @out
2232 * @in: a pointer to an array of UTF-8 chars
2233 * @inlen: the length of @in
2234 * @xlattable: the 2-level transcoding table
2235 *
2236 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2237 * block of chars out.
2238 *
2239 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2240 * The value of @inlen after return is the number of octets consumed
2241 * as the return value is positive, else unpredictable.
2242 * The value of @outlen after return is the number of ocetes consumed.
2243 */
2244static int
2245UTF8ToISO8859x(unsigned char* out, int *outlen,
2246 const unsigned char* in, int *inlen,
2247 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002248 const unsigned char* outstart = out;
2249 const unsigned char* inend;
2250 const unsigned char* instart = in;
2251
Daniel Veillardce682bc2004-11-05 17:22:25 +00002252 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2253 (xlattable == NULL))
2254 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002255 if (in == NULL) {
2256 /*
2257 * initialization nothing to do
2258 */
2259 *outlen = 0;
2260 *inlen = 0;
2261 return(0);
2262 }
2263 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002264 while (in < inend) {
2265 unsigned char d = *in++;
2266 if (d < 0x80) {
2267 *out++ = d;
2268 } else if (d < 0xC0) {
2269 /* trailing byte in leading position */
2270 *outlen = out - outstart;
2271 *inlen = in - instart - 1;
2272 return(-2);
2273 } else if (d < 0xE0) {
2274 unsigned char c;
2275 if (!(in < inend)) {
2276 /* trailing byte not in input buffer */
2277 *outlen = out - outstart;
2278 *inlen = in - instart - 1;
2279 return(-2);
2280 }
2281 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002282 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002283 /* not a trailing byte */
2284 *outlen = out - outstart;
2285 *inlen = in - instart - 2;
2286 return(-2);
2287 }
2288 c = c & 0x3F;
2289 d = d & 0x1F;
2290 d = xlattable [48 + c + xlattable [d] * 64];
2291 if (d == 0) {
2292 /* not in character set */
2293 *outlen = out - outstart;
2294 *inlen = in - instart - 2;
2295 return(-2);
2296 }
2297 *out++ = d;
2298 } else if (d < 0xF0) {
2299 unsigned char c1;
2300 unsigned char c2;
2301 if (!(in < inend - 1)) {
2302 /* trailing bytes not in input buffer */
2303 *outlen = out - outstart;
2304 *inlen = in - instart - 1;
2305 return(-2);
2306 }
2307 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002308 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002309 /* not a trailing byte (c1) */
2310 *outlen = out - outstart;
2311 *inlen = in - instart - 2;
2312 return(-2);
2313 }
2314 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002315 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002316 /* not a trailing byte (c2) */
2317 *outlen = out - outstart;
2318 *inlen = in - instart - 2;
2319 return(-2);
2320 }
2321 c1 = c1 & 0x3F;
2322 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002323 d = d & 0x0F;
2324 d = xlattable [48 + c2 + xlattable [48 + c1 +
2325 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002326 if (d == 0) {
2327 /* not in character set */
2328 *outlen = out - outstart;
2329 *inlen = in - instart - 3;
2330 return(-2);
2331 }
2332 *out++ = d;
2333 } else {
2334 /* cannot transcode >= U+010000 */
2335 *outlen = out - outstart;
2336 *inlen = in - instart - 1;
2337 return(-2);
2338 }
2339 }
2340 *outlen = out - outstart;
2341 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002342 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002343}
2344
2345/**
2346 * ISO8859xToUTF8
2347 * @out: a pointer to an array of bytes to store the result
2348 * @outlen: the length of @out
2349 * @in: a pointer to an array of ISO Latin 1 chars
2350 * @inlen: the length of @in
2351 *
2352 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2353 * block of chars out.
2354 * Returns 0 if success, or -1 otherwise
2355 * The value of @inlen after return is the number of octets consumed
2356 * The value of @outlen after return is the number of ocetes produced.
2357 */
2358static int
2359ISO8859xToUTF8(unsigned char* out, int *outlen,
2360 const unsigned char* in, int *inlen,
2361 unsigned short const *unicodetable) {
2362 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002363 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002364 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002365 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00002366 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002367 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002368
Daniel Veillardce682bc2004-11-05 17:22:25 +00002369 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002370 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002371 return(-1);
2372 outend = out + *outlen;
2373 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00002374 instop = inend;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002375 c = *in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002376 while (in < inend && out < outend - 1) {
2377 if (c >= 0x80) {
2378 c = unicodetable [c - 0x80];
2379 if (c == 0) {
2380 /* undefined code point */
2381 *outlen = out - outstart;
2382 *inlen = in - instart;
2383 return (-1);
2384 }
2385 if (c < 0x800) {
2386 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2387 *out++ = (c & 0x3F) | 0x80;
2388 } else {
2389 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2390 *out++ = ((c >> 6) & 0x3F) | 0x80;
2391 *out++ = (c & 0x3F) | 0x80;
2392 }
2393 ++in;
2394 c = *in;
2395 }
2396 if (instop - in > outend - out) instop = in + (outend - out);
2397 while (c < 0x80 && in < instop) {
2398 *out++ = c;
2399 ++in;
2400 c = *in;
2401 }
2402 }
2403 if (in < inend && out < outend && c < 0x80) {
2404 *out++ = c;
2405 ++in;
2406 }
2407 *outlen = out - outstart;
2408 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002409 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002410}
2411
2412
2413/************************************************************************
2414 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2415 ************************************************************************/
2416
2417static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2418 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2419 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2420 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2421 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2422 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2423 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2424 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2425 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2426 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2427 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2428 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2429 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2430 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2431 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2432 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2433 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2434};
2435
2436static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2437 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2444 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2445 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2446 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2447 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2448 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2449 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2452 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2453 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2456 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2457 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2458 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2459 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2460 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2461 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2462 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2463 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2464};
2465
2466static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2467 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2468 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2469 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2470 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2471 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2472 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2473 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2474 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2475 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2476 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2477 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2478 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2479 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2480 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2481 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2482 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2483};
2484
2485static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2486 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2493 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2494 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2495 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2496 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2497 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2499 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2500 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2502 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2505 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2513 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2514 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2515 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2516 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2517};
2518
2519static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2520 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2521 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2522 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2523 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2524 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2525 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2526 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2527 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2528 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2529 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2530 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2531 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2532 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2533 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2534 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2535 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2536};
2537
2538static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2539 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2546 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2547 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2548 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2549 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2550 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2551 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2552 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2553 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2554 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2555 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2556 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2557 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2558 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2562 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2563 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2564 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2565 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2566};
2567
2568static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2569 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2570 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2571 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2572 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2573 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2574 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2575 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2576 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2577 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2578 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2579 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2580 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2581 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2582 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2583 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2584 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2585};
2586
2587static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2588 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2589 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2590 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2595 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2596 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2597 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2599 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2600 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2601 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2602 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2603 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2604 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2607 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2612 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615};
2616
2617static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2618 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2619 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2620 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2621 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2622 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2623 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2624 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2625 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2626 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2627 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2628 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2629 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2630 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2631 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2632 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2633 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2634};
2635
2636static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2637 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2638 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2645 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2646 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2648 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2654 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2655 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2656 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2657 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660};
2661
2662static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2663 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2664 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2665 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2666 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2667 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2668 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2669 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2670 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2671 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2672 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2673 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2674 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2675 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2676 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2677 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2678 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2679};
2680
2681static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2682 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2684 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2689 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2690 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2691 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2692 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2693 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2701 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2706 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2707 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2708 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2709 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713};
2714
2715static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2716 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2717 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2718 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2719 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2720 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2721 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2722 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2723 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2724 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2725 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2726 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2727 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2728 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2729 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2730 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2731 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2732};
2733
2734static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2735 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2742 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2743 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2744 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2745 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2746 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2751 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2754 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2759 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2764 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2766};
2767
2768static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2769 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2770 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2771 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2772 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2773 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2774 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2775 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2776 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2777 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2778 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2779 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2780 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2781 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2782 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2783 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2784 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2785};
2786
2787static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2788 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2796 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2797 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2798 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2799 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2800 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2801 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2802 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811};
2812
2813static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2814 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2815 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2816 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2817 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2818 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2819 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2820 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2821 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2822 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2823 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2824 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2825 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2826 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2827 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2828 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2829 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2830};
2831
2832static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2833 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2841 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2842 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2843 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2844 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2845 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2846 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2847 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2851 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2852 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2861 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2862 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2863 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2864};
2865
2866static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2867 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2868 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2869 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2870 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2871 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2872 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2873 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2874 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2875 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2876 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2877 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2878 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2879 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2880 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2881 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2882 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2883};
2884
2885static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2886 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2894 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2895 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2900 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2901 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2902 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2903 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2904 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2905 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2910 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913};
2914
2915static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2916 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2917 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2918 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2919 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2920 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2921 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2922 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2923 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2924 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2925 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2926 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2927 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2928 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2929 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2930 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2931 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2932};
2933
2934static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2935 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2943 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2944 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2945 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2946 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2956 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2958 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2960 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2961 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2962 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2963 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2964 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2965 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2966};
2967
2968static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2969 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2970 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2971 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2972 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2973 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2974 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2975 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2976 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2977 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2978 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2979 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2980 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2981 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2982 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2983 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2984 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2985};
2986
2987static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2988 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2996 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2997 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
2998 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3003 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3005 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3023 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3028 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3029 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3030 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3031};
3032
3033static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3034 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3035 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3036 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3037 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3038 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3039 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3040 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3041 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3042 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3043 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3044 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3045 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3046 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3047 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3048 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3049 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3050};
3051
3052static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3053 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3061 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3062 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3063 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3064 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3076 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3077 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3078 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3079 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3080};
3081
3082static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3083 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3084 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3085 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3086 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3087 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3088 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3089 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3090 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3091 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3092 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3093 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3094 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3095 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3096 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3097 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3098 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3099};
3100
3101static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3102 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3110 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3111 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3112 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3113 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3114 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3119 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3121 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3138 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3139 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3140 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3141};
3142
3143
3144/*
3145 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3146 */
3147
3148static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3149 const unsigned char* in, int *inlen) {
3150 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3151}
3152static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3153 const unsigned char* in, int *inlen) {
3154 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3155}
3156
3157static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3158 const unsigned char* in, int *inlen) {
3159 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3160}
3161static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3162 const unsigned char* in, int *inlen) {
3163 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3164}
3165
3166static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3167 const unsigned char* in, int *inlen) {
3168 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3169}
3170static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3171 const unsigned char* in, int *inlen) {
3172 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3173}
3174
3175static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3176 const unsigned char* in, int *inlen) {
3177 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3178}
3179static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3180 const unsigned char* in, int *inlen) {
3181 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3182}
3183
3184static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3185 const unsigned char* in, int *inlen) {
3186 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3187}
3188static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3189 const unsigned char* in, int *inlen) {
3190 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3191}
3192
3193static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3194 const unsigned char* in, int *inlen) {
3195 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3196}
3197static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3198 const unsigned char* in, int *inlen) {
3199 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3200}
3201
3202static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3203 const unsigned char* in, int *inlen) {
3204 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3205}
3206static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3207 const unsigned char* in, int *inlen) {
3208 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3209}
3210
3211static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3212 const unsigned char* in, int *inlen) {
3213 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3214}
3215static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3216 const unsigned char* in, int *inlen) {
3217 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3218}
3219
3220static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3221 const unsigned char* in, int *inlen) {
3222 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3223}
3224static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3225 const unsigned char* in, int *inlen) {
3226 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3227}
3228
3229static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3230 const unsigned char* in, int *inlen) {
3231 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3232}
3233static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3234 const unsigned char* in, int *inlen) {
3235 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3236}
3237
3238static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3239 const unsigned char* in, int *inlen) {
3240 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3241}
3242static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3243 const unsigned char* in, int *inlen) {
3244 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3245}
3246
3247static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3248 const unsigned char* in, int *inlen) {
3249 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3250}
3251static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3252 const unsigned char* in, int *inlen) {
3253 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3254}
3255
3256static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3257 const unsigned char* in, int *inlen) {
3258 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3259}
3260static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3261 const unsigned char* in, int *inlen) {
3262 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3263}
3264
3265static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3266 const unsigned char* in, int *inlen) {
3267 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3268}
3269static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3270 const unsigned char* in, int *inlen) {
3271 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3272}
3273
3274static void
3275xmlRegisterCharEncodingHandlersISO8859x (void) {
3276 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3277 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3278 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3279 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3280 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3281 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3282 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3283 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3284 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3285 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3286 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3287 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3288 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3289 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3290}
3291
3292#endif
3293#endif
3294
Daniel Veillard5d4644e2005-04-01 13:11:58 +00003295#define bottom_encoding
3296#include "elfgcchack.h"
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003297