blob: 87b1d9b84fec6c912249a00f461feb692b2a8811 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01004 * Related specs:
Owen Taylor3473f882001-02-23 17:55:21 +00005 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
Daniel Veillard18d0db22012-07-13 19:51:15 +080027#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29#ifdef HAVE_CTYPE_H
30#include <ctype.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef LIBXML_ICONV_ENABLED
36#ifdef HAVE_ERRNO_H
37#include <errno.h>
38#endif
39#endif
40#include <libxml/encoding.h>
41#include <libxml/xmlmemory.h>
42#ifdef LIBXML_HTML_ENABLED
43#include <libxml/HTMLparser.h>
44#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000045#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000046#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047
Daniel Veillard18d0db22012-07-13 19:51:15 +080048#include "buf.h"
49#include "enc.h"
50
Daniel Veillard22090732001-07-16 00:06:07 +000051static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000053
54typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59};
60
61static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62static int xmlCharEncodingAliasesNb = 0;
63static int xmlCharEncodingAliasesMax = 0;
64
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +010065#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
Owen Taylor3473f882001-02-23 17:55:21 +000066#if 0
67#define DEBUG_ENCODING /* Define this to get encoding traces */
68#endif
William M. Brack16db7b62003-08-07 13:12:49 +000069#else
70#ifdef LIBXML_ISO8859X_ENABLED
71static void xmlRegisterCharEncodingHandlersISO8859x (void);
72#endif
Owen Taylor3473f882001-02-23 17:55:21 +000073#endif
74
75static int xmlLittleEndian = 1;
76
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000077/**
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
80 *
81 * Handle an out of memory condition
82 */
83static void
84xmlEncodingErrMemory(const char *extra)
85{
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87}
88
89/**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
96static void
97xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98{
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102}
Daniel Veillard97ac1312001-05-30 19:14:17 +0000103
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100104#ifdef LIBXML_ICU_ENABLED
105static uconv_t*
106openIcuConverter(const char* name, int toUnicode)
107{
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->uconv = ucnv_open(name, &status);
114 if (U_FAILURE(status))
115 goto error;
116
117 status = U_ZERO_ERROR;
118 if (toUnicode) {
119 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
121 }
122 else {
123 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124 NULL, NULL, NULL, &status);
125 }
126 if (U_FAILURE(status))
127 goto error;
128
129 status = U_ZERO_ERROR;
130 conv->utf8 = ucnv_open("UTF-8", &status);
131 if (U_SUCCESS(status))
132 return conv;
133
134error:
135 if (conv->uconv)
136 ucnv_close(conv->uconv);
137 xmlFree(conv);
138 return NULL;
139}
140
141static void
142closeIcuConverter(uconv_t *conv)
143{
144 if (conv != NULL) {
145 ucnv_close(conv->uconv);
146 ucnv_close(conv->utf8);
147 xmlFree(conv);
148 }
149}
150#endif /* LIBXML_ICU_ENABLED */
151
Daniel Veillard97ac1312001-05-30 19:14:17 +0000152/************************************************************************
153 * *
154 * Conversions To/From UTF8 encoding *
155 * *
156 ************************************************************************/
157
158/**
Owen Taylor3473f882001-02-23 17:55:21 +0000159 * asciiToUTF8:
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of ASCII chars
163 * @inlen: the length of @in
164 *
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000169 * if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000171 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000172static int
Owen Taylor3473f882001-02-23 17:55:21 +0000173asciiToUTF8(unsigned char* out, int *outlen,
174 const unsigned char* in, int *inlen) {
175 unsigned char* outstart = out;
176 const unsigned char* base = in;
177 const unsigned char* processed = in;
178 unsigned char* outend = out + *outlen;
179 const unsigned char* inend;
180 unsigned int c;
Owen Taylor3473f882001-02-23 17:55:21 +0000181
182 inend = in + (*inlen);
183 while ((in < inend) && (out - outstart + 5 < *outlen)) {
184 c= *in++;
185
Owen Taylor3473f882001-02-23 17:55:21 +0000186 if (out >= outend)
187 break;
Daniel Veillard2728f842006-03-09 16:49:24 +0000188 if (c < 0x80) {
189 *out++ = c;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100190 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000191 *outlen = out - outstart;
192 *inlen = processed - base;
193 return(-1);
194 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100195
Owen Taylor3473f882001-02-23 17:55:21 +0000196 processed = (const unsigned char*) in;
197 }
198 *outlen = out - outstart;
199 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000200 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000201}
202
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000203#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000204/**
205 * UTF8Toascii:
206 * @out: a pointer to an array of bytes to store the result
207 * @outlen: the length of @out
208 * @in: a pointer to an array of UTF-8 chars
209 * @inlen: the length of @in
210 *
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
213 *
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000216 * if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000218 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000219static int
Owen Taylor3473f882001-02-23 17:55:21 +0000220UTF8Toascii(unsigned char* out, int *outlen,
221 const unsigned char* in, int *inlen) {
222 const unsigned char* processed = in;
223 const unsigned char* outend;
224 const unsigned char* outstart = out;
225 const unsigned char* instart = in;
226 const unsigned char* inend;
227 unsigned int c, d;
228 int trailing;
229
Daniel Veillardce682bc2004-11-05 17:22:25 +0000230 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 if (in == NULL) {
232 /*
233 * initialization nothing to do
234 */
235 *outlen = 0;
236 *inlen = 0;
237 return(0);
238 }
239 inend = in + (*inlen);
240 outend = out + (*outlen);
241 while (in < inend) {
242 d = *in++;
243 if (d < 0x80) { c= d; trailing= 0; }
244 else if (d < 0xC0) {
245 /* trailing byte in leading position */
246 *outlen = out - outstart;
247 *inlen = processed - instart;
248 return(-2);
249 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
250 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
251 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
252 else {
253 /* no chance for this in Ascii */
254 *outlen = out - outstart;
255 *inlen = processed - instart;
256 return(-2);
257 }
258
259 if (inend - in < trailing) {
260 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100261 }
Owen Taylor3473f882001-02-23 17:55:21 +0000262
263 for ( ; trailing; trailing--) {
264 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265 break;
266 c <<= 6;
267 c |= d & 0x3F;
268 }
269
270 /* assertion: c is a single UTF-4 value */
271 if (c < 0x80) {
272 if (out >= outend)
273 break;
274 *out++ = c;
275 } else {
276 /* no chance for this in Ascii */
277 *outlen = out - outstart;
278 *inlen = processed - instart;
279 return(-2);
280 }
281 processed = in;
282 }
283 *outlen = out - outstart;
284 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000285 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000286}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000287#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289/**
290 * isolat1ToUTF8:
291 * @out: a pointer to an array of bytes to store the result
292 * @outlen: the length of @out
293 * @in: a pointer to an array of ISO Latin 1 chars
294 * @inlen: the length of @in
295 *
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000298 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000299 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000300 * if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000302 */
303int
304isolat1ToUTF8(unsigned char* out, int *outlen,
305 const unsigned char* in, int *inlen) {
306 unsigned char* outstart = out;
307 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000308 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000310 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000311
Daniel Veillardce682bc2004-11-05 17:22:25 +0000312 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313 return(-1);
314
315 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000316 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000317 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100318
319 while ((in < inend) && (out < outend - 1)) {
320 if (*in >= 0x80) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000321 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100322 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000323 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000324 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100325 if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 while ((in < instop) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000327 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000328 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100329 }
330 if ((in < inend) && (out < outend) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000331 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000332 }
333 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000334 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000335 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000336}
337
Daniel Veillard81601f92003-01-14 13:42:37 +0000338/**
339 * UTF8ToUTF8:
340 * @out: a pointer to an array of bytes to store the result
341 * @outlen: the length of @out
342 * @inb: a pointer to an array of UTF-8 chars
343 * @inlenb: the length of @in in UTF-8 chars
344 *
345 * No op copy operation for UTF8 handling.
346 *
William M. Brackf9415e42003-11-28 09:39:10 +0000347 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000348 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000349 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000350 */
351static int
352UTF8ToUTF8(unsigned char* out, int *outlen,
353 const unsigned char* inb, int *inlenb)
354{
355 int len;
356
357 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
358 return(-1);
359 if (*outlen > *inlenb) {
360 len = *inlenb;
361 } else {
362 len = *outlen;
363 }
364 if (len < 0)
365 return(-1);
366
367 memcpy(out, inb, len);
368
369 *outlen = len;
370 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000371 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000372}
373
Daniel Veillarde72c7562002-05-31 09:47:30 +0000374
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000375#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000376/**
377 * UTF8Toisolat1:
378 * @out: a pointer to an array of bytes to store the result
379 * @outlen: the length of @out
380 * @in: a pointer to an array of UTF-8 chars
381 * @inlen: the length of @in
382 *
383 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
384 * block of chars out.
385 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000386 * Returns the number of bytes written if success, -2 if the transcoding fails,
387 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000388 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000389 * if the return value is positive, else unpredictable.
390 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000391 */
392int
393UTF8Toisolat1(unsigned char* out, int *outlen,
394 const unsigned char* in, int *inlen) {
395 const unsigned char* processed = in;
396 const unsigned char* outend;
397 const unsigned char* outstart = out;
398 const unsigned char* instart = in;
399 const unsigned char* inend;
400 unsigned int c, d;
401 int trailing;
402
Daniel Veillardce682bc2004-11-05 17:22:25 +0000403 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000404 if (in == NULL) {
405 /*
406 * initialization nothing to do
407 */
408 *outlen = 0;
409 *inlen = 0;
410 return(0);
411 }
412 inend = in + (*inlen);
413 outend = out + (*outlen);
414 while (in < inend) {
415 d = *in++;
416 if (d < 0x80) { c= d; trailing= 0; }
417 else if (d < 0xC0) {
418 /* trailing byte in leading position */
419 *outlen = out - outstart;
420 *inlen = processed - instart;
421 return(-2);
422 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
423 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
424 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
425 else {
426 /* no chance for this in IsoLat1 */
427 *outlen = out - outstart;
428 *inlen = processed - instart;
429 return(-2);
430 }
431
432 if (inend - in < trailing) {
433 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100434 }
Owen Taylor3473f882001-02-23 17:55:21 +0000435
436 for ( ; trailing; trailing--) {
437 if (in >= inend)
438 break;
439 if (((d= *in++) & 0xC0) != 0x80) {
440 *outlen = out - outstart;
441 *inlen = processed - instart;
442 return(-2);
443 }
444 c <<= 6;
445 c |= d & 0x3F;
446 }
447
448 /* assertion: c is a single UTF-4 value */
449 if (c <= 0xFF) {
450 if (out >= outend)
451 break;
452 *out++ = c;
453 } else {
454 /* no chance for this in IsoLat1 */
455 *outlen = out - outstart;
456 *inlen = processed - instart;
457 return(-2);
458 }
459 processed = in;
460 }
461 *outlen = out - outstart;
462 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000463 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000464}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000465#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000466
467/**
468 * UTF16LEToUTF8:
469 * @out: a pointer to an array of bytes to store the result
470 * @outlen: the length of @out
471 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
472 * @inlenb: the length of @in in UTF-16LE chars
473 *
474 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000475 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000476 * is the same between the native type of this machine and the
477 * inputed one.
478 *
William M. Brackf9415e42003-11-28 09:39:10 +0000479 * Returns the number of bytes written, or -1 if lack of space, or -2
480 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000481 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000482 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000483 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000484static int
Owen Taylor3473f882001-02-23 17:55:21 +0000485UTF16LEToUTF8(unsigned char* out, int *outlen,
486 const unsigned char* inb, int *inlenb)
487{
488 unsigned char* outstart = out;
489 const unsigned char* processed = inb;
490 unsigned char* outend = out + *outlen;
491 unsigned short* in = (unsigned short*) inb;
492 unsigned short* inend;
493 unsigned int c, d, inlen;
494 unsigned char *tmp;
495 int bits;
496
497 if ((*inlenb % 2) == 1)
498 (*inlenb)--;
499 inlen = *inlenb / 2;
500 inend = in + inlen;
501 while ((in < inend) && (out - outstart + 5 < *outlen)) {
502 if (xmlLittleEndian) {
503 c= *in++;
504 } else {
505 tmp = (unsigned char *) in;
506 c = *tmp++;
507 c = c | (((unsigned int)*tmp) << 8);
508 in++;
509 }
510 if ((c & 0xFC00) == 0xD800) { /* surrogates */
511 if (in >= inend) { /* (in > inend) shouldn't happens */
512 break;
513 }
514 if (xmlLittleEndian) {
515 d = *in++;
516 } else {
517 tmp = (unsigned char *) in;
518 d = *tmp++;
519 d = d | (((unsigned int)*tmp) << 8);
520 in++;
521 }
522 if ((d & 0xFC00) == 0xDC00) {
523 c &= 0x03FF;
524 c <<= 10;
525 c |= d & 0x03FF;
526 c += 0x10000;
527 }
528 else {
529 *outlen = out - outstart;
530 *inlenb = processed - inb;
531 return(-2);
532 }
533 }
534
535 /* assertion: c is a single UTF-4 value */
536 if (out >= outend)
537 break;
538 if (c < 0x80) { *out++= c; bits= -6; }
539 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
540 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
541 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100542
Owen Taylor3473f882001-02-23 17:55:21 +0000543 for ( ; bits >= 0; bits-= 6) {
544 if (out >= outend)
545 break;
546 *out++= ((c >> bits) & 0x3F) | 0x80;
547 }
548 processed = (const unsigned char*) in;
549 }
550 *outlen = out - outstart;
551 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000552 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000553}
554
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000555#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000556/**
557 * UTF8ToUTF16LE:
558 * @outb: a pointer to an array of bytes to store the result
559 * @outlen: the length of @outb
560 * @in: a pointer to an array of UTF-8 chars
561 * @inlen: the length of @in
562 *
563 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
564 * block of chars out.
565 *
William M. Brackf9415e42003-11-28 09:39:10 +0000566 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100567 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000568 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000569static int
Owen Taylor3473f882001-02-23 17:55:21 +0000570UTF8ToUTF16LE(unsigned char* outb, int *outlen,
571 const unsigned char* in, int *inlen)
572{
573 unsigned short* out = (unsigned short*) outb;
574 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000575 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000576 unsigned short* outstart= out;
577 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000578 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000579 unsigned int c, d;
580 int trailing;
581 unsigned char *tmp;
582 unsigned short tmp1, tmp2;
583
William M. Brackf9415e42003-11-28 09:39:10 +0000584 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000585 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000586 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000587 *outlen = 0;
588 *inlen = 0;
589 return(0);
590 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000591 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000592 outend = out + (*outlen / 2);
593 while (in < inend) {
594 d= *in++;
595 if (d < 0x80) { c= d; trailing= 0; }
596 else if (d < 0xC0) {
597 /* trailing byte in leading position */
598 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000599 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000600 return(-2);
601 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
602 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
603 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
604 else {
605 /* no chance for this in UTF-16 */
606 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000607 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000608 return(-2);
609 }
610
611 if (inend - in < trailing) {
612 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100613 }
Owen Taylor3473f882001-02-23 17:55:21 +0000614
615 for ( ; trailing; trailing--) {
616 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
617 break;
618 c <<= 6;
619 c |= d & 0x3F;
620 }
621
622 /* assertion: c is a single UTF-4 value */
623 if (c < 0x10000) {
624 if (out >= outend)
625 break;
626 if (xmlLittleEndian) {
627 *out++ = c;
628 } else {
629 tmp = (unsigned char *) out;
630 *tmp = c ;
631 *(tmp + 1) = c >> 8 ;
632 out++;
633 }
634 }
635 else if (c < 0x110000) {
636 if (out+1 >= outend)
637 break;
638 c -= 0x10000;
639 if (xmlLittleEndian) {
640 *out++ = 0xD800 | (c >> 10);
641 *out++ = 0xDC00 | (c & 0x03FF);
642 } else {
643 tmp1 = 0xD800 | (c >> 10);
644 tmp = (unsigned char *) out;
645 *tmp = (unsigned char) tmp1;
646 *(tmp + 1) = tmp1 >> 8;
647 out++;
648
649 tmp2 = 0xDC00 | (c & 0x03FF);
650 tmp = (unsigned char *) out;
651 *tmp = (unsigned char) tmp2;
652 *(tmp + 1) = tmp2 >> 8;
653 out++;
654 }
655 }
656 else
657 break;
658 processed = in;
659 }
660 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000661 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000662 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000663}
664
665/**
William M. Brackf9415e42003-11-28 09:39:10 +0000666 * UTF8ToUTF16:
667 * @outb: a pointer to an array of bytes to store the result
668 * @outlen: the length of @outb
669 * @in: a pointer to an array of UTF-8 chars
670 * @inlen: the length of @in
671 *
672 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
673 * block of chars out.
674 *
675 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100676 * if the transcoding failed.
William M. Brackf9415e42003-11-28 09:39:10 +0000677 */
678static int
679UTF8ToUTF16(unsigned char* outb, int *outlen,
680 const unsigned char* in, int *inlen)
681{
682 if (in == NULL) {
683 /*
684 * initialization, add the Byte Order Mark for UTF-16LE
685 */
686 if (*outlen >= 2) {
687 outb[0] = 0xFF;
688 outb[1] = 0xFE;
689 *outlen = 2;
690 *inlen = 0;
691#ifdef DEBUG_ENCODING
692 xmlGenericError(xmlGenericErrorContext,
693 "Added FFFE Byte Order Mark\n");
694#endif
695 return(2);
696 }
697 *outlen = 0;
698 *inlen = 0;
699 return(0);
700 }
701 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
702}
William M. Brack030a7a12004-02-10 12:48:57 +0000703#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000704
705/**
Owen Taylor3473f882001-02-23 17:55:21 +0000706 * UTF16BEToUTF8:
707 * @out: a pointer to an array of bytes to store the result
708 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000709 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000710 * @inlenb: the length of @in in UTF-16 chars
711 *
712 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000713 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000714 * is the same between the native type of this machine and the
715 * inputed one.
716 *
William M. Brackf9415e42003-11-28 09:39:10 +0000717 * Returns the number of bytes written, or -1 if lack of space, or -2
718 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000719 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000720 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000721 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000722static int
Owen Taylor3473f882001-02-23 17:55:21 +0000723UTF16BEToUTF8(unsigned char* out, int *outlen,
724 const unsigned char* inb, int *inlenb)
725{
726 unsigned char* outstart = out;
727 const unsigned char* processed = inb;
728 unsigned char* outend = out + *outlen;
729 unsigned short* in = (unsigned short*) inb;
730 unsigned short* inend;
731 unsigned int c, d, inlen;
732 unsigned char *tmp;
733 int bits;
734
735 if ((*inlenb % 2) == 1)
736 (*inlenb)--;
737 inlen = *inlenb / 2;
738 inend= in + inlen;
739 while (in < inend) {
740 if (xmlLittleEndian) {
741 tmp = (unsigned char *) in;
742 c = *tmp++;
743 c = c << 8;
744 c = c | (unsigned int) *tmp;
745 in++;
746 } else {
747 c= *in++;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100748 }
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if ((c & 0xFC00) == 0xD800) { /* surrogates */
750 if (in >= inend) { /* (in > inend) shouldn't happens */
751 *outlen = out - outstart;
752 *inlenb = processed - inb;
753 return(-2);
754 }
755 if (xmlLittleEndian) {
756 tmp = (unsigned char *) in;
757 d = *tmp++;
758 d = d << 8;
759 d = d | (unsigned int) *tmp;
760 in++;
761 } else {
762 d= *in++;
763 }
764 if ((d & 0xFC00) == 0xDC00) {
765 c &= 0x03FF;
766 c <<= 10;
767 c |= d & 0x03FF;
768 c += 0x10000;
769 }
770 else {
771 *outlen = out - outstart;
772 *inlenb = processed - inb;
773 return(-2);
774 }
775 }
776
777 /* assertion: c is a single UTF-4 value */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100778 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000779 break;
780 if (c < 0x80) { *out++= c; bits= -6; }
781 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
782 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
783 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100784
Owen Taylor3473f882001-02-23 17:55:21 +0000785 for ( ; bits >= 0; bits-= 6) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100786 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000787 break;
788 *out++= ((c >> bits) & 0x3F) | 0x80;
789 }
790 processed = (const unsigned char*) in;
791 }
792 *outlen = out - outstart;
793 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000794 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000795}
796
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000797#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000798/**
799 * UTF8ToUTF16BE:
800 * @outb: a pointer to an array of bytes to store the result
801 * @outlen: the length of @outb
802 * @in: a pointer to an array of UTF-8 chars
803 * @inlen: the length of @in
804 *
805 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
806 * block of chars out.
807 *
808 * Returns the number of byte written, or -1 by lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100809 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000810 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000811static int
Owen Taylor3473f882001-02-23 17:55:21 +0000812UTF8ToUTF16BE(unsigned char* outb, int *outlen,
813 const unsigned char* in, int *inlen)
814{
815 unsigned short* out = (unsigned short*) outb;
816 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000817 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000818 unsigned short* outstart= out;
819 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000820 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000821 unsigned int c, d;
822 int trailing;
823 unsigned char *tmp;
824 unsigned short tmp1, tmp2;
825
William M. Brackf9415e42003-11-28 09:39:10 +0000826 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000827 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000828 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000829 *outlen = 0;
830 *inlen = 0;
831 return(0);
832 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000833 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000834 outend = out + (*outlen / 2);
835 while (in < inend) {
836 d= *in++;
837 if (d < 0x80) { c= d; trailing= 0; }
838 else if (d < 0xC0) {
839 /* trailing byte in leading position */
840 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000841 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000842 return(-2);
843 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
844 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
845 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
846 else {
847 /* no chance for this in UTF-16 */
848 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000849 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000850 return(-2);
851 }
852
853 if (inend - in < trailing) {
854 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100855 }
Owen Taylor3473f882001-02-23 17:55:21 +0000856
857 for ( ; trailing; trailing--) {
858 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
859 c <<= 6;
860 c |= d & 0x3F;
861 }
862
863 /* assertion: c is a single UTF-4 value */
864 if (c < 0x10000) {
865 if (out >= outend) break;
866 if (xmlLittleEndian) {
867 tmp = (unsigned char *) out;
868 *tmp = c >> 8;
869 *(tmp + 1) = c;
870 out++;
871 } else {
872 *out++ = c;
873 }
874 }
875 else if (c < 0x110000) {
876 if (out+1 >= outend) break;
877 c -= 0x10000;
878 if (xmlLittleEndian) {
879 tmp1 = 0xD800 | (c >> 10);
880 tmp = (unsigned char *) out;
881 *tmp = tmp1 >> 8;
882 *(tmp + 1) = (unsigned char) tmp1;
883 out++;
884
885 tmp2 = 0xDC00 | (c & 0x03FF);
886 tmp = (unsigned char *) out;
887 *tmp = tmp2 >> 8;
888 *(tmp + 1) = (unsigned char) tmp2;
889 out++;
890 } else {
891 *out++ = 0xD800 | (c >> 10);
892 *out++ = 0xDC00 | (c & 0x03FF);
893 }
894 }
895 else
896 break;
897 processed = in;
898 }
899 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000900 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000901 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000902}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000903#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000904
Daniel Veillard97ac1312001-05-30 19:14:17 +0000905/************************************************************************
906 * *
907 * Generic encoding handling routines *
908 * *
909 ************************************************************************/
910
Owen Taylor3473f882001-02-23 17:55:21 +0000911/**
912 * xmlDetectCharEncoding:
913 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000914 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000915 * @len: pointer to the length of the buffer
916 *
917 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000918 * according to the non-normative appendix F of the XML-1.0 recommendation.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100919 *
Owen Taylor3473f882001-02-23 17:55:21 +0000920 * Returns one of the XML_CHAR_ENCODING_... values.
921 */
922xmlCharEncoding
923xmlDetectCharEncoding(const unsigned char* in, int len)
924{
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100925 if (in == NULL)
Daniel Veillardce682bc2004-11-05 17:22:25 +0000926 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000927 if (len >= 4) {
928 if ((in[0] == 0x00) && (in[1] == 0x00) &&
929 (in[2] == 0x00) && (in[3] == 0x3C))
930 return(XML_CHAR_ENCODING_UCS4BE);
931 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
932 (in[2] == 0x00) && (in[3] == 0x00))
933 return(XML_CHAR_ENCODING_UCS4LE);
934 if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 (in[2] == 0x3C) && (in[3] == 0x00))
936 return(XML_CHAR_ENCODING_UCS4_2143);
937 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
938 (in[2] == 0x00) && (in[3] == 0x00))
939 return(XML_CHAR_ENCODING_UCS4_3412);
940 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
941 (in[2] == 0xA7) && (in[3] == 0x94))
942 return(XML_CHAR_ENCODING_EBCDIC);
943 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
944 (in[2] == 0x78) && (in[3] == 0x6D))
945 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000946 /*
947 * Although not part of the recommendation, we also
948 * attempt an "auto-recognition" of UTF-16LE and
949 * UTF-16BE encodings.
950 */
951 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 (in[2] == 0x3F) && (in[3] == 0x00))
953 return(XML_CHAR_ENCODING_UTF16LE);
954 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
955 (in[2] == 0x00) && (in[3] == 0x3F))
956 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000957 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000958 if (len >= 3) {
959 /*
960 * Errata on XML-1.0 June 20 2001
961 * We now allow an UTF8 encoded BOM
962 */
963 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
964 (in[2] == 0xBF))
965 return(XML_CHAR_ENCODING_UTF8);
966 }
William M. Brackf9415e42003-11-28 09:39:10 +0000967 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000968 if (len >= 2) {
969 if ((in[0] == 0xFE) && (in[1] == 0xFF))
970 return(XML_CHAR_ENCODING_UTF16BE);
971 if ((in[0] == 0xFF) && (in[1] == 0xFE))
972 return(XML_CHAR_ENCODING_UTF16LE);
973 }
974 return(XML_CHAR_ENCODING_NONE);
975}
976
977/**
978 * xmlCleanupEncodingAliases:
979 *
980 * Unregisters all aliases
981 */
982void
983xmlCleanupEncodingAliases(void) {
984 int i;
985
986 if (xmlCharEncodingAliases == NULL)
987 return;
988
989 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
990 if (xmlCharEncodingAliases[i].name != NULL)
991 xmlFree((char *) xmlCharEncodingAliases[i].name);
992 if (xmlCharEncodingAliases[i].alias != NULL)
993 xmlFree((char *) xmlCharEncodingAliases[i].alias);
994 }
995 xmlCharEncodingAliasesNb = 0;
996 xmlCharEncodingAliasesMax = 0;
997 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000998 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000999}
1000
1001/**
1002 * xmlGetEncodingAlias:
1003 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1004 *
1005 * Lookup an encoding name for the given alias.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 *
William M. Brackf9415e42003-11-28 09:39:10 +00001007 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +00001008 */
1009const char *
1010xmlGetEncodingAlias(const char *alias) {
1011 int i;
1012 char upper[100];
1013
1014 if (alias == NULL)
1015 return(NULL);
1016
1017 if (xmlCharEncodingAliases == NULL)
1018 return(NULL);
1019
1020 for (i = 0;i < 99;i++) {
1021 upper[i] = toupper(alias[i]);
1022 if (upper[i] == 0) break;
1023 }
1024 upper[i] = 0;
1025
1026 /*
1027 * Walk down the list looking for a definition of the alias
1028 */
1029 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1030 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1031 return(xmlCharEncodingAliases[i].name);
1032 }
1033 }
1034 return(NULL);
1035}
1036
1037/**
1038 * xmlAddEncodingAlias:
1039 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1040 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1041 *
William M. Brackf9415e42003-11-28 09:39:10 +00001042 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +00001043 * will be overwritten.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001044 *
Owen Taylor3473f882001-02-23 17:55:21 +00001045 * Returns 0 in case of success, -1 in case of error
1046 */
1047int
1048xmlAddEncodingAlias(const char *name, const char *alias) {
1049 int i;
1050 char upper[100];
1051
1052 if ((name == NULL) || (alias == NULL))
1053 return(-1);
1054
1055 for (i = 0;i < 99;i++) {
1056 upper[i] = toupper(alias[i]);
1057 if (upper[i] == 0) break;
1058 }
1059 upper[i] = 0;
1060
1061 if (xmlCharEncodingAliases == NULL) {
1062 xmlCharEncodingAliasesNb = 0;
1063 xmlCharEncodingAliasesMax = 20;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001064 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001065 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1066 if (xmlCharEncodingAliases == NULL)
1067 return(-1);
1068 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1069 xmlCharEncodingAliasesMax *= 2;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001070 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001071 xmlRealloc(xmlCharEncodingAliases,
1072 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1073 }
1074 /*
1075 * Walk down the list looking for a definition of the alias
1076 */
1077 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1078 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1079 /*
1080 * Replace the definition.
1081 */
1082 xmlFree((char *) xmlCharEncodingAliases[i].name);
1083 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1084 return(0);
1085 }
1086 }
1087 /*
1088 * Add the definition
1089 */
1090 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1091 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1092 xmlCharEncodingAliasesNb++;
1093 return(0);
1094}
1095
1096/**
1097 * xmlDelEncodingAlias:
1098 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1099 *
1100 * Unregisters an encoding alias @alias
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001101 *
Owen Taylor3473f882001-02-23 17:55:21 +00001102 * Returns 0 in case of success, -1 in case of error
1103 */
1104int
1105xmlDelEncodingAlias(const char *alias) {
1106 int i;
1107
1108 if (alias == NULL)
1109 return(-1);
1110
1111 if (xmlCharEncodingAliases == NULL)
1112 return(-1);
1113 /*
1114 * Walk down the list looking for a definition of the alias
1115 */
1116 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1117 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1118 xmlFree((char *) xmlCharEncodingAliases[i].name);
1119 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1120 xmlCharEncodingAliasesNb--;
1121 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1122 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1123 return(0);
1124 }
1125 }
1126 return(-1);
1127}
1128
1129/**
1130 * xmlParseCharEncoding:
1131 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1132 *
William M. Brackf9415e42003-11-28 09:39:10 +00001133 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001134 * that the comparison is case insensitive accordingly to the section
1135 * [XML] 4.3.3 Character Encoding in Entities.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001136 *
Owen Taylor3473f882001-02-23 17:55:21 +00001137 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1138 * if not recognized.
1139 */
1140xmlCharEncoding
1141xmlParseCharEncoding(const char* name)
1142{
1143 const char *alias;
1144 char upper[500];
1145 int i;
1146
1147 if (name == NULL)
1148 return(XML_CHAR_ENCODING_NONE);
1149
1150 /*
1151 * Do the alias resolution
1152 */
1153 alias = xmlGetEncodingAlias(name);
1154 if (alias != NULL)
1155 name = alias;
1156
1157 for (i = 0;i < 499;i++) {
1158 upper[i] = toupper(name[i]);
1159 if (upper[i] == 0) break;
1160 }
1161 upper[i] = 0;
1162
1163 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1164 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1165 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1166
1167 /*
1168 * NOTE: if we were able to parse this, the endianness of UTF16 is
1169 * already found and in use
1170 */
1171 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1172 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001173
Owen Taylor3473f882001-02-23 17:55:21 +00001174 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1175 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1176 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1177
1178 /*
1179 * NOTE: if we were able to parse this, the endianness of UCS4 is
1180 * already found and in use
1181 */
1182 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1183 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1184 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1185
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001186
Owen Taylor3473f882001-02-23 17:55:21 +00001187 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1188 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1189 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1190
1191 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1192 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1193 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1194
1195 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1196 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1197 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1198 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1199 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1200 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1201 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1202
1203 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1204 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1205 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1206
1207#ifdef DEBUG_ENCODING
1208 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1209#endif
1210 return(XML_CHAR_ENCODING_ERROR);
1211}
1212
1213/**
1214 * xmlGetCharEncodingName:
1215 * @enc: the encoding
1216 *
1217 * The "canonical" name for XML encoding.
1218 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1219 * Section 4.3.3 Character Encoding in Entities
1220 *
1221 * Returns the canonical name for the given encoding
1222 */
1223
1224const char*
1225xmlGetCharEncodingName(xmlCharEncoding enc) {
1226 switch (enc) {
1227 case XML_CHAR_ENCODING_ERROR:
1228 return(NULL);
1229 case XML_CHAR_ENCODING_NONE:
1230 return(NULL);
1231 case XML_CHAR_ENCODING_UTF8:
1232 return("UTF-8");
1233 case XML_CHAR_ENCODING_UTF16LE:
1234 return("UTF-16");
1235 case XML_CHAR_ENCODING_UTF16BE:
1236 return("UTF-16");
1237 case XML_CHAR_ENCODING_EBCDIC:
1238 return("EBCDIC");
1239 case XML_CHAR_ENCODING_UCS4LE:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4BE:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS4_2143:
1244 return("ISO-10646-UCS-4");
1245 case XML_CHAR_ENCODING_UCS4_3412:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS2:
1248 return("ISO-10646-UCS-2");
1249 case XML_CHAR_ENCODING_8859_1:
1250 return("ISO-8859-1");
1251 case XML_CHAR_ENCODING_8859_2:
1252 return("ISO-8859-2");
1253 case XML_CHAR_ENCODING_8859_3:
1254 return("ISO-8859-3");
1255 case XML_CHAR_ENCODING_8859_4:
1256 return("ISO-8859-4");
1257 case XML_CHAR_ENCODING_8859_5:
1258 return("ISO-8859-5");
1259 case XML_CHAR_ENCODING_8859_6:
1260 return("ISO-8859-6");
1261 case XML_CHAR_ENCODING_8859_7:
1262 return("ISO-8859-7");
1263 case XML_CHAR_ENCODING_8859_8:
1264 return("ISO-8859-8");
1265 case XML_CHAR_ENCODING_8859_9:
1266 return("ISO-8859-9");
1267 case XML_CHAR_ENCODING_2022_JP:
1268 return("ISO-2022-JP");
1269 case XML_CHAR_ENCODING_SHIFT_JIS:
1270 return("Shift-JIS");
1271 case XML_CHAR_ENCODING_EUC_JP:
1272 return("EUC-JP");
1273 case XML_CHAR_ENCODING_ASCII:
1274 return(NULL);
1275 }
1276 return(NULL);
1277}
1278
Daniel Veillard97ac1312001-05-30 19:14:17 +00001279/************************************************************************
1280 * *
1281 * Char encoding handlers *
1282 * *
1283 ************************************************************************/
1284
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286/* the size should be growable, but it's not a big deal ... */
1287#define MAX_ENCODING_HANDLERS 50
1288static xmlCharEncodingHandlerPtr *handlers = NULL;
1289static int nbCharEncodingHandler = 0;
1290
1291/*
1292 * The default is UTF-8 for XML, that's also the default used for the
1293 * parser internals, so the default encoding handler is NULL
1294 */
1295
1296static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1297
1298/**
1299 * xmlNewCharEncodingHandler:
1300 * @name: the encoding name, in UTF-8 format (ASCII actually)
1301 * @input: the xmlCharEncodingInputFunc to read that encoding
1302 * @output: the xmlCharEncodingOutputFunc to write that encoding
1303 *
1304 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001305 *
Owen Taylor3473f882001-02-23 17:55:21 +00001306 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1307 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001308xmlCharEncodingHandlerPtr
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001309xmlNewCharEncodingHandler(const char *name,
Owen Taylor3473f882001-02-23 17:55:21 +00001310 xmlCharEncodingInputFunc input,
1311 xmlCharEncodingOutputFunc output) {
1312 xmlCharEncodingHandlerPtr handler;
1313 const char *alias;
1314 char upper[500];
1315 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001316 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001317
1318 /*
1319 * Do the alias resolution
1320 */
1321 alias = xmlGetEncodingAlias(name);
1322 if (alias != NULL)
1323 name = alias;
1324
1325 /*
1326 * Keep only the uppercase version of the encoding.
1327 */
1328 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001329 xmlEncodingErr(XML_I18N_NO_NAME,
1330 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001331 return(NULL);
1332 }
1333 for (i = 0;i < 499;i++) {
1334 upper[i] = toupper(name[i]);
1335 if (upper[i] == 0) break;
1336 }
1337 upper[i] = 0;
1338 up = xmlMemStrdup(upper);
1339 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001340 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001341 return(NULL);
1342 }
1343
1344 /*
1345 * allocate and fill-up an handler block.
1346 */
1347 handler = (xmlCharEncodingHandlerPtr)
1348 xmlMalloc(sizeof(xmlCharEncodingHandler));
1349 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001350 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001351 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001352 return(NULL);
1353 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001354 memset(handler, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001355 handler->input = input;
1356 handler->output = output;
1357 handler->name = up;
1358
1359#ifdef LIBXML_ICONV_ENABLED
1360 handler->iconv_in = NULL;
1361 handler->iconv_out = NULL;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001362#endif
1363#ifdef LIBXML_ICU_ENABLED
1364 handler->uconv_in = NULL;
1365 handler->uconv_out = NULL;
1366#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001367
1368 /*
1369 * registers and returns the handler.
1370 */
1371 xmlRegisterCharEncodingHandler(handler);
1372#ifdef DEBUG_ENCODING
1373 xmlGenericError(xmlGenericErrorContext,
1374 "Registered encoding handler for %s\n", name);
1375#endif
1376 return(handler);
1377}
1378
1379/**
1380 * xmlInitCharEncodingHandlers:
1381 *
1382 * Initialize the char encoding support, it registers the default
1383 * encoding supported.
1384 * NOTE: while public, this function usually doesn't need to be called
1385 * in normal processing.
1386 */
1387void
1388xmlInitCharEncodingHandlers(void) {
1389 unsigned short int tst = 0x1234;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001390 unsigned char *ptr = (unsigned char *) &tst;
Owen Taylor3473f882001-02-23 17:55:21 +00001391
1392 if (handlers != NULL) return;
1393
1394 handlers = (xmlCharEncodingHandlerPtr *)
1395 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1396
1397 if (*ptr == 0x12) xmlLittleEndian = 0;
1398 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001399 else {
1400 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1401 "Odd problem at endianness detection\n", NULL);
1402 }
Owen Taylor3473f882001-02-23 17:55:21 +00001403
1404 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001405 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001406 return;
1407 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001408 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001409#ifdef LIBXML_OUTPUT_ENABLED
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001410 xmlUTF16LEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001411 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001412 xmlUTF16BEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001413 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001414 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001415 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1416 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001417 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001418#ifdef LIBXML_HTML_ENABLED
1419 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1420#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001421#else
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001422 xmlUTF16LEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001423 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001424 xmlUTF16BEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001425 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001426 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001427 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1428 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1429 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1430#endif /* LIBXML_OUTPUT_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001431#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001432#ifdef LIBXML_ISO8859X_ENABLED
1433 xmlRegisterCharEncodingHandlersISO8859x ();
1434#endif
1435#endif
1436
Owen Taylor3473f882001-02-23 17:55:21 +00001437}
1438
1439/**
1440 * xmlCleanupCharEncodingHandlers:
1441 *
1442 * Cleanup the memory allocated for the char encoding support, it
1443 * unregisters all the encoding handlers and the aliases.
1444 */
1445void
1446xmlCleanupCharEncodingHandlers(void) {
1447 xmlCleanupEncodingAliases();
1448
1449 if (handlers == NULL) return;
1450
1451 for (;nbCharEncodingHandler > 0;) {
1452 nbCharEncodingHandler--;
1453 if (handlers[nbCharEncodingHandler] != NULL) {
1454 if (handlers[nbCharEncodingHandler]->name != NULL)
1455 xmlFree(handlers[nbCharEncodingHandler]->name);
1456 xmlFree(handlers[nbCharEncodingHandler]);
1457 }
1458 }
1459 xmlFree(handlers);
1460 handlers = NULL;
1461 nbCharEncodingHandler = 0;
1462 xmlDefaultCharEncodingHandler = NULL;
1463}
1464
1465/**
1466 * xmlRegisterCharEncodingHandler:
1467 * @handler: the xmlCharEncodingHandlerPtr handler block
1468 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001469 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001470 */
1471void
1472xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1473 if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard76d36452009-09-07 11:19:33 +02001474 if ((handler == NULL) || (handlers == NULL)) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001475 xmlEncodingErr(XML_I18N_NO_HANDLER,
1476 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001477 return;
1478 }
1479
1480 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001481 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1482 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1483 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001484 return;
1485 }
1486 handlers[nbCharEncodingHandler++] = handler;
1487}
1488
1489/**
1490 * xmlGetCharEncodingHandler:
1491 * @enc: an xmlCharEncoding value.
1492 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001493 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001494 *
1495 * Returns the handler or NULL if not found
1496 */
1497xmlCharEncodingHandlerPtr
1498xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1499 xmlCharEncodingHandlerPtr handler;
1500
1501 if (handlers == NULL) xmlInitCharEncodingHandlers();
1502 switch (enc) {
1503 case XML_CHAR_ENCODING_ERROR:
1504 return(NULL);
1505 case XML_CHAR_ENCODING_NONE:
1506 return(NULL);
1507 case XML_CHAR_ENCODING_UTF8:
1508 return(NULL);
1509 case XML_CHAR_ENCODING_UTF16LE:
1510 return(xmlUTF16LEHandler);
1511 case XML_CHAR_ENCODING_UTF16BE:
1512 return(xmlUTF16BEHandler);
1513 case XML_CHAR_ENCODING_EBCDIC:
1514 handler = xmlFindCharEncodingHandler("EBCDIC");
1515 if (handler != NULL) return(handler);
1516 handler = xmlFindCharEncodingHandler("ebcdic");
1517 if (handler != NULL) return(handler);
Martin Köglerc78988a2009-08-24 16:47:48 +02001518 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1519 if (handler != NULL) return(handler);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 break;
1521 case XML_CHAR_ENCODING_UCS4BE:
1522 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1523 if (handler != NULL) return(handler);
1524 handler = xmlFindCharEncodingHandler("UCS-4");
1525 if (handler != NULL) return(handler);
1526 handler = xmlFindCharEncodingHandler("UCS4");
1527 if (handler != NULL) return(handler);
1528 break;
1529 case XML_CHAR_ENCODING_UCS4LE:
1530 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531 if (handler != NULL) return(handler);
1532 handler = xmlFindCharEncodingHandler("UCS-4");
1533 if (handler != NULL) return(handler);
1534 handler = xmlFindCharEncodingHandler("UCS4");
1535 if (handler != NULL) return(handler);
1536 break;
1537 case XML_CHAR_ENCODING_UCS4_2143:
1538 break;
1539 case XML_CHAR_ENCODING_UCS4_3412:
1540 break;
1541 case XML_CHAR_ENCODING_UCS2:
1542 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1543 if (handler != NULL) return(handler);
1544 handler = xmlFindCharEncodingHandler("UCS-2");
1545 if (handler != NULL) return(handler);
1546 handler = xmlFindCharEncodingHandler("UCS2");
1547 if (handler != NULL) return(handler);
1548 break;
1549
1550 /*
1551 * We used to keep ISO Latin encodings native in the
1552 * generated data. This led to so many problems that
1553 * this has been removed. One can still change this
1554 * back by registering no-ops encoders for those
1555 */
1556 case XML_CHAR_ENCODING_8859_1:
1557 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1558 if (handler != NULL) return(handler);
1559 break;
1560 case XML_CHAR_ENCODING_8859_2:
1561 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1562 if (handler != NULL) return(handler);
1563 break;
1564 case XML_CHAR_ENCODING_8859_3:
1565 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1566 if (handler != NULL) return(handler);
1567 break;
1568 case XML_CHAR_ENCODING_8859_4:
1569 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1570 if (handler != NULL) return(handler);
1571 break;
1572 case XML_CHAR_ENCODING_8859_5:
1573 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1574 if (handler != NULL) return(handler);
1575 break;
1576 case XML_CHAR_ENCODING_8859_6:
1577 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1578 if (handler != NULL) return(handler);
1579 break;
1580 case XML_CHAR_ENCODING_8859_7:
1581 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1582 if (handler != NULL) return(handler);
1583 break;
1584 case XML_CHAR_ENCODING_8859_8:
1585 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1586 if (handler != NULL) return(handler);
1587 break;
1588 case XML_CHAR_ENCODING_8859_9:
1589 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1590 if (handler != NULL) return(handler);
1591 break;
1592
1593
1594 case XML_CHAR_ENCODING_2022_JP:
1595 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1596 if (handler != NULL) return(handler);
1597 break;
1598 case XML_CHAR_ENCODING_SHIFT_JIS:
1599 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1600 if (handler != NULL) return(handler);
1601 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1602 if (handler != NULL) return(handler);
1603 handler = xmlFindCharEncodingHandler("Shift_JIS");
1604 if (handler != NULL) return(handler);
1605 break;
1606 case XML_CHAR_ENCODING_EUC_JP:
1607 handler = xmlFindCharEncodingHandler("EUC-JP");
1608 if (handler != NULL) return(handler);
1609 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001610 default:
Owen Taylor3473f882001-02-23 17:55:21 +00001611 break;
1612 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001613
Owen Taylor3473f882001-02-23 17:55:21 +00001614#ifdef DEBUG_ENCODING
1615 xmlGenericError(xmlGenericErrorContext,
1616 "No handler found for encoding %d\n", enc);
1617#endif
1618 return(NULL);
1619}
1620
1621/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001622 * xmlFindCharEncodingHandler:
1623 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001624 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001625 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001626 *
1627 * Returns the handler or NULL if not found
1628 */
1629xmlCharEncodingHandlerPtr
1630xmlFindCharEncodingHandler(const char *name) {
1631 const char *nalias;
1632 const char *norig;
1633 xmlCharEncoding alias;
1634#ifdef LIBXML_ICONV_ENABLED
1635 xmlCharEncodingHandlerPtr enc;
1636 iconv_t icv_in, icv_out;
1637#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001638#ifdef LIBXML_ICU_ENABLED
1639 xmlCharEncodingHandlerPtr encu;
1640 uconv_t *ucv_in, *ucv_out;
1641#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001642 char upper[100];
1643 int i;
1644
1645 if (handlers == NULL) xmlInitCharEncodingHandlers();
1646 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1647 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1648
1649 /*
1650 * Do the alias resolution
1651 */
1652 norig = name;
1653 nalias = xmlGetEncodingAlias(name);
1654 if (nalias != NULL)
1655 name = nalias;
1656
1657 /*
1658 * Check first for directly registered encoding names
1659 */
1660 for (i = 0;i < 99;i++) {
1661 upper[i] = toupper(name[i]);
1662 if (upper[i] == 0) break;
1663 }
1664 upper[i] = 0;
1665
Daniel Veillardd44b9362009-09-07 12:15:08 +02001666 if (handlers != NULL) {
1667 for (i = 0;i < nbCharEncodingHandler; i++) {
1668 if (!strcmp(upper, handlers[i]->name)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001669#ifdef DEBUG_ENCODING
Daniel Veillardd44b9362009-09-07 12:15:08 +02001670 xmlGenericError(xmlGenericErrorContext,
1671 "Found registered handler for encoding %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001672#endif
Daniel Veillardd44b9362009-09-07 12:15:08 +02001673 return(handlers[i]);
1674 }
1675 }
1676 }
Owen Taylor3473f882001-02-23 17:55:21 +00001677
1678#ifdef LIBXML_ICONV_ENABLED
1679 /* check whether iconv can handle this */
1680 icv_in = iconv_open("UTF-8", name);
1681 icv_out = iconv_open(name, "UTF-8");
Daniel Veillard28aac0b2006-10-16 08:31:18 +00001682 if (icv_in == (iconv_t) -1) {
1683 icv_in = iconv_open("UTF-8", upper);
1684 }
1685 if (icv_out == (iconv_t) -1) {
1686 icv_out = iconv_open(upper, "UTF-8");
1687 }
Owen Taylor3473f882001-02-23 17:55:21 +00001688 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1689 enc = (xmlCharEncodingHandlerPtr)
1690 xmlMalloc(sizeof(xmlCharEncodingHandler));
1691 if (enc == NULL) {
1692 iconv_close(icv_in);
1693 iconv_close(icv_out);
1694 return(NULL);
1695 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001696 memset(enc, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001697 enc->name = xmlMemStrdup(name);
1698 enc->input = NULL;
1699 enc->output = NULL;
1700 enc->iconv_in = icv_in;
1701 enc->iconv_out = icv_out;
1702#ifdef DEBUG_ENCODING
1703 xmlGenericError(xmlGenericErrorContext,
1704 "Found iconv handler for encoding %s\n", name);
1705#endif
1706 return enc;
1707 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001708 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001709 "iconv : problems with filters for '%s'\n", name);
1710 }
1711#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001712#ifdef LIBXML_ICU_ENABLED
1713 /* check whether icu can handle this */
1714 ucv_in = openIcuConverter(name, 1);
1715 ucv_out = openIcuConverter(name, 0);
1716 if (ucv_in != NULL && ucv_out != NULL) {
1717 encu = (xmlCharEncodingHandlerPtr)
1718 xmlMalloc(sizeof(xmlCharEncodingHandler));
1719 if (encu == NULL) {
1720 closeIcuConverter(ucv_in);
1721 closeIcuConverter(ucv_out);
1722 return(NULL);
1723 }
1724 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1725 encu->name = xmlMemStrdup(name);
1726 encu->input = NULL;
1727 encu->output = NULL;
1728 encu->uconv_in = ucv_in;
1729 encu->uconv_out = ucv_out;
1730#ifdef DEBUG_ENCODING
1731 xmlGenericError(xmlGenericErrorContext,
1732 "Found ICU converter handler for encoding %s\n", name);
1733#endif
1734 return encu;
1735 } else if (ucv_in != NULL || ucv_out != NULL) {
1736 closeIcuConverter(ucv_in);
1737 closeIcuConverter(ucv_out);
1738 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1739 "ICU converter : problems with filters for '%s'\n", name);
1740 }
1741#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001742
1743#ifdef DEBUG_ENCODING
1744 xmlGenericError(xmlGenericErrorContext,
1745 "No handler found for encoding %s\n", name);
1746#endif
1747
1748 /*
1749 * Fallback using the canonical names
1750 */
1751 alias = xmlParseCharEncoding(norig);
1752 if (alias != XML_CHAR_ENCODING_ERROR) {
1753 const char* canon;
1754 canon = xmlGetCharEncodingName(alias);
1755 if ((canon != NULL) && (strcmp(name, canon))) {
1756 return(xmlFindCharEncodingHandler(canon));
1757 }
1758 }
1759
William M. Brackf9415e42003-11-28 09:39:10 +00001760 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(NULL);
1762}
1763
Daniel Veillard97ac1312001-05-30 19:14:17 +00001764/************************************************************************
1765 * *
1766 * ICONV based generic conversion functions *
1767 * *
1768 ************************************************************************/
1769
Owen Taylor3473f882001-02-23 17:55:21 +00001770#ifdef LIBXML_ICONV_ENABLED
1771/**
1772 * xmlIconvWrapper:
1773 * @cd: iconv converter data structure
1774 * @out: a pointer to an array of bytes to store the result
1775 * @outlen: the length of @out
1776 * @in: a pointer to an array of ISO Latin 1 chars
1777 * @inlen: the length of @in
1778 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001779 * Returns 0 if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * -1 by lack of space, or
1781 * -2 if the transcoding fails (for *in is not valid utf8 string or
1782 * the result of transformation can't fit into the encoding we want), or
1783 * -3 if there the last byte can't form a single output char.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001784 *
Owen Taylor3473f882001-02-23 17:55:21 +00001785 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001786 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001787 * The value of @outlen after return is the number of ocetes consumed.
1788 */
1789static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001790xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1791 const unsigned char *in, int *inlen) {
1792 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001793 const char *icv_in = (const char *) in;
1794 char *icv_out = (char *) out;
1795 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001796
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001797 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1798 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001799 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001800 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001801 icv_inlen = *inlen;
1802 icv_outlen = *outlen;
Daniel Veillard8e1a46d2008-02-15 07:47:26 +00001803 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard2728f842006-03-09 16:49:24 +00001804 *inlen -= icv_inlen;
1805 *outlen -= icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001806 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001807#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001808 if (errno == EILSEQ) {
1809 return -2;
1810 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001811#endif
1812#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001813 if (errno == E2BIG) {
1814 return -1;
1815 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001816#endif
1817#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001818 if (errno == EINVAL) {
1819 return -3;
1820 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001821#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001822 {
1823 return -3;
1824 }
1825 }
1826 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001827}
1828#endif /* LIBXML_ICONV_ENABLED */
1829
Daniel Veillard97ac1312001-05-30 19:14:17 +00001830/************************************************************************
1831 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001832 * ICU based generic conversion functions *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001833 * *
1834 ************************************************************************/
1835
1836#ifdef LIBXML_ICU_ENABLED
1837/**
1838 * xmlUconvWrapper:
1839 * @cd: ICU uconverter data structure
1840 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1841 * @out: a pointer to an array of bytes to store the result
1842 * @outlen: the length of @out
1843 * @in: a pointer to an array of ISO Latin 1 chars
1844 * @inlen: the length of @in
1845 *
1846 * Returns 0 if success, or
1847 * -1 by lack of space, or
1848 * -2 if the transcoding fails (for *in is not valid utf8 string or
1849 * the result of transformation can't fit into the encoding we want), or
1850 * -3 if there the last byte can't form a single output char.
1851 *
1852 * The value of @inlen after return is the number of octets consumed
1853 * as the return value is positive, else unpredictable.
1854 * The value of @outlen after return is the number of ocetes consumed.
1855 */
1856static int
1857xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1858 const unsigned char *in, int *inlen) {
1859 const char *ucv_in = (const char *) in;
1860 char *ucv_out = (char *) out;
1861 UErrorCode err = U_ZERO_ERROR;
1862
1863 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1864 if (outlen != NULL) *outlen = 0;
1865 return(-1);
1866 }
1867
1868 /*
1869 * TODO(jungshik)
1870 * 1. is ucnv_convert(To|From)Algorithmic better?
1871 * 2. had we better use an explicit pivot buffer?
1872 * 3. error returned comes from 'fromUnicode' only even
1873 * when toUnicode is true !
1874 */
1875 if (toUnicode) {
1876 /* encoding => UTF-16 => UTF-8 */
1877 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1878 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1879 0, TRUE, &err);
1880 } else {
1881 /* UTF-8 => UTF-16 => encoding */
1882 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1883 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1884 0, TRUE, &err);
1885 }
1886 *inlen = ucv_in - (const char*) in;
1887 *outlen = ucv_out - (char *) out;
1888 if (U_SUCCESS(err))
1889 return 0;
1890 if (err == U_BUFFER_OVERFLOW_ERROR)
1891 return -1;
1892 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1893 return -2;
1894 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1895 return -3;
1896}
1897#endif /* LIBXML_ICU_ENABLED */
1898
1899/************************************************************************
1900 * *
Daniel Veillard97ac1312001-05-30 19:14:17 +00001901 * The real API used by libxml for on-the-fly conversion *
1902 * *
1903 ************************************************************************/
1904
Owen Taylor3473f882001-02-23 17:55:21 +00001905/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001906 * xmlCharEncFirstLineInt:
Owen Taylor3473f882001-02-23 17:55:21 +00001907 * @handler: char enconding transformation data structure
1908 * @out: an xmlBuffer for the output.
1909 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001910 * @len: number of bytes to convert for the first line, or -1
1911 *
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * Front-end for the encoding handler input function, but handle only
1913 * the very first line, i.e. limit itself to 45 chars.
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001914 *
1915 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001916 * -1 general error
1917 * -2 if the transcoding fails (for *in is not valid utf8 string or
1918 * the result of transformation can't fit into the encoding we want), or
1919 */
1920int
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001921xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1922 xmlBufferPtr in, int len) {
Owen Taylor3473f882001-02-23 17:55:21 +00001923 int ret = -2;
1924 int written;
1925 int toconv;
1926
1927 if (handler == NULL) return(-1);
1928 if (out == NULL) return(-1);
1929 if (in == NULL) return(-1);
1930
William M. Brack38d452a2007-05-22 16:00:06 +00001931 /* calculate space available */
Daniel Veillard69f04562011-08-19 11:05:04 +08001932 written = out->size - out->use - 1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00001933 toconv = in->use;
Owen Taylor3473f882001-02-23 17:55:21 +00001934 /*
1935 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1936 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001937 * declaration without going too far inside the document content.
Daniel Veillard57c9db02008-03-06 14:37:10 +00001938 * on UTF-16 this means 90bytes, on UCS4 this means 180
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001939 * The actual value depending on guessed encoding is passed as @len
1940 * if provided
Owen Taylor3473f882001-02-23 17:55:21 +00001941 */
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001942 if (len >= 0) {
1943 if (toconv > len)
1944 toconv = len;
1945 } else {
1946 if (toconv > 180)
1947 toconv = 180;
1948 }
William M. Brack38d452a2007-05-22 16:00:06 +00001949 if (toconv * 2 >= written) {
Daniel Veillard18d0db22012-07-13 19:51:15 +08001950 xmlBufferGrow(out, toconv * 2);
William M. Brack38d452a2007-05-22 16:00:06 +00001951 written = out->size - out->use - 1;
1952 }
Owen Taylor3473f882001-02-23 17:55:21 +00001953
1954 if (handler->input != NULL) {
1955 ret = handler->input(&out->content[out->use], &written,
1956 in->content, &toconv);
1957 xmlBufferShrink(in, toconv);
1958 out->use += written;
1959 out->content[out->use] = 0;
1960 }
1961#ifdef LIBXML_ICONV_ENABLED
1962 else if (handler->iconv_in != NULL) {
1963 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1964 &written, in->content, &toconv);
1965 xmlBufferShrink(in, toconv);
1966 out->use += written;
1967 out->content[out->use] = 0;
1968 if (ret == -1) ret = -3;
1969 }
1970#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001971#ifdef LIBXML_ICU_ENABLED
1972 else if (handler->uconv_in != NULL) {
1973 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1974 &written, in->content, &toconv);
1975 xmlBufferShrink(in, toconv);
1976 out->use += written;
1977 out->content[out->use] = 0;
1978 if (ret == -1) ret = -3;
1979 }
1980#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001981#ifdef DEBUG_ENCODING
1982 switch (ret) {
1983 case 0:
1984 xmlGenericError(xmlGenericErrorContext,
1985 "converted %d bytes to %d bytes of input\n",
1986 toconv, written);
1987 break;
1988 case -1:
1989 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1990 toconv, written, in->use);
1991 break;
1992 case -2:
1993 xmlGenericError(xmlGenericErrorContext,
1994 "input conversion failed due to input error\n");
1995 break;
1996 case -3:
1997 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1998 toconv, written, in->use);
1999 break;
2000 default:
2001 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2002 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002003#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002004 /*
2005 * Ignore when input buffer is not on a boundary
2006 */
2007 if (ret == -3) ret = 0;
2008 if (ret == -1) ret = 0;
2009 return(ret);
2010}
2011
2012/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002013 * xmlCharEncFirstLine:
2014 * @handler: char enconding transformation data structure
2015 * @out: an xmlBuffer for the output.
2016 * @in: an xmlBuffer for the input
2017 *
2018 * Front-end for the encoding handler input function, but handle only
2019 * the very first line, i.e. limit itself to 45 chars.
2020 *
2021 * Returns the number of byte written if success, or
2022 * -1 general error
2023 * -2 if the transcoding fails (for *in is not valid utf8 string or
2024 * the result of transformation can't fit into the encoding we want), or
2025 */
2026int
2027xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2028 xmlBufferPtr in) {
2029 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2030}
2031
2032/**
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002033 * xmlCharEncFirstLineInput:
Daniel Veillard18d0db22012-07-13 19:51:15 +08002034 * @input: a parser input buffer
2035 * @len: number of bytes to convert for the first line, or -1
2036 *
2037 * Front-end for the encoding handler input function, but handle only
2038 * the very first line. Point is that this is based on autodetection
2039 * of the encoding and once that first line is converted we may find
2040 * out that a different decoder is needed to process the input.
2041 *
2042 * Returns the number of byte written if success, or
2043 * -1 general error
2044 * -2 if the transcoding fails (for *in is not valid utf8 string or
2045 * the result of transformation can't fit into the encoding we want), or
2046 */
2047int
2048xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2049{
2050 int ret = -2;
2051 size_t written;
2052 size_t toconv;
2053 int c_in;
2054 int c_out;
2055 xmlBufPtr in;
2056 xmlBufPtr out;
2057
2058 if ((input == NULL) || (input->encoder == NULL) ||
2059 (input->buffer == NULL) || (input->raw == NULL))
2060 return (-1);
2061 out = input->buffer;
2062 in = input->raw;
2063
2064 toconv = xmlBufUse(in);
2065 if (toconv == 0)
2066 return (0);
2067 written = xmlBufAvail(out) - 1; /* count '\0' */
2068 /*
2069 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2070 * 45 chars should be sufficient to reach the end of the encoding
2071 * declaration without going too far inside the document content.
2072 * on UTF-16 this means 90bytes, on UCS4 this means 180
2073 * The actual value depending on guessed encoding is passed as @len
2074 * if provided
2075 */
2076 if (len >= 0) {
2077 if (toconv > (unsigned int) len)
2078 toconv = len;
2079 } else {
2080 if (toconv > 180)
2081 toconv = 180;
2082 }
2083 if (toconv * 2 >= written) {
2084 xmlBufGrow(out, toconv * 2);
2085 written = xmlBufAvail(out) - 1;
2086 }
2087 if (written > 360)
2088 written = 360;
2089
2090 c_in = toconv;
2091 c_out = written;
2092 if (input->encoder->input != NULL) {
2093 ret = input->encoder->input(xmlBufEnd(out), &c_out,
2094 xmlBufContent(in), &c_in);
2095 xmlBufShrink(in, c_in);
2096 xmlBufAddLen(out, c_out);
2097 }
2098#ifdef LIBXML_ICONV_ENABLED
2099 else if (input->encoder->iconv_in != NULL) {
2100 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2101 &c_out, xmlBufContent(in), &c_in);
2102 xmlBufShrink(in, c_in);
2103 xmlBufAddLen(out, c_out);
2104 if (ret == -1)
2105 ret = -3;
2106 }
2107#endif /* LIBXML_ICONV_ENABLED */
2108#ifdef LIBXML_ICU_ENABLED
2109 else if (input->encoder->uconv_in != NULL) {
2110 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2111 &c_out, xmlBufContent(in), &c_in);
2112 xmlBufShrink(in, c_in);
2113 xmlBufAddLen(out, c_out);
2114 if (ret == -1)
2115 ret = -3;
2116 }
2117#endif /* LIBXML_ICU_ENABLED */
2118 switch (ret) {
2119 case 0:
2120#ifdef DEBUG_ENCODING
2121 xmlGenericError(xmlGenericErrorContext,
2122 "converted %d bytes to %d bytes of input\n",
2123 c_in, c_out);
2124#endif
2125 break;
2126 case -1:
2127#ifdef DEBUG_ENCODING
2128 xmlGenericError(xmlGenericErrorContext,
2129 "converted %d bytes to %d bytes of input, %d left\n",
2130 c_in, c_out, (int)xmlBufUse(in));
2131#endif
2132 break;
2133 case -3:
2134#ifdef DEBUG_ENCODING
2135 xmlGenericError(xmlGenericErrorContext,
2136 "converted %d bytes to %d bytes of input, %d left\n",
2137 c_in, c_out, (int)xmlBufUse(in));
2138#endif
2139 break;
2140 case -2: {
2141 char buf[50];
2142 const xmlChar *content = xmlBufContent(in);
2143
2144 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2145 content[0], content[1],
2146 content[2], content[3]);
2147 buf[49] = 0;
2148 xmlEncodingErr(XML_I18N_CONV_FAILED,
2149 "input conversion failed due to input error, bytes %s\n",
2150 buf);
2151 }
2152 }
2153 /*
2154 * Ignore when input buffer is not on a boundary
2155 */
2156 if (ret == -3) ret = 0;
2157 if (ret == -1) ret = 0;
2158 return(ret);
2159}
2160
2161/**
2162 * xmlCharEncInput:
2163 * @input: a parser input buffer
2164 *
2165 * Generic front-end for the encoding handler on parser input
2166 *
2167 * Returns the number of byte written if success, or
2168 * -1 general error
2169 * -2 if the transcoding fails (for *in is not valid utf8 string or
2170 * the result of transformation can't fit into the encoding we want), or
2171 */
2172int
2173xmlCharEncInput(xmlParserInputBufferPtr input)
2174{
2175 int ret = -2;
2176 size_t written;
2177 size_t toconv;
2178 int c_in;
2179 int c_out;
2180 xmlBufPtr in;
2181 xmlBufPtr out;
2182
2183 if ((input == NULL) || (input->encoder == NULL) ||
2184 (input->buffer == NULL) || (input->raw == NULL))
2185 return (-1);
2186 out = input->buffer;
2187 in = input->raw;
2188
2189 toconv = xmlBufUse(in);
2190 if (toconv == 0)
2191 return (0);
2192 if (toconv > 64 * 1024)
2193 toconv = 64 * 1024;
2194 written = xmlBufAvail(out);
2195 if (written > 0)
2196 written--; /* count '\0' */
2197 if (toconv * 2 >= written) {
2198 xmlBufGrow(out, toconv * 2);
2199 written = xmlBufAvail(out);
2200 if (written > 0)
2201 written--; /* count '\0' */
2202 }
2203 if (written > 128 * 1024)
2204 written = 128 * 1024;
2205
2206 c_in = toconv;
2207 c_out = written;
2208 if (input->encoder->input != NULL) {
2209 ret = input->encoder->input(xmlBufEnd(out), &c_out,
2210 xmlBufContent(in), &c_in);
2211 xmlBufShrink(in, c_in);
2212 xmlBufAddLen(out, c_out);
2213 }
2214#ifdef LIBXML_ICONV_ENABLED
2215 else if (input->encoder->iconv_in != NULL) {
2216 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2217 &c_out, xmlBufContent(in), &c_in);
2218 xmlBufShrink(in, c_in);
2219 xmlBufAddLen(out, c_out);
2220 if (ret == -1)
2221 ret = -3;
2222 }
2223#endif /* LIBXML_ICONV_ENABLED */
2224#ifdef LIBXML_ICU_ENABLED
2225 else if (input->encoder->uconv_in != NULL) {
2226 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2227 &c_out, xmlBufContent(in), &c_in);
2228 xmlBufShrink(in, c_in);
2229 xmlBufAddLen(out, c_out);
2230 if (ret == -1)
2231 ret = -3;
2232 }
2233#endif /* LIBXML_ICU_ENABLED */
2234 switch (ret) {
2235 case 0:
2236#ifdef DEBUG_ENCODING
2237 xmlGenericError(xmlGenericErrorContext,
2238 "converted %d bytes to %d bytes of input\n",
2239 c_in, c_out);
2240#endif
2241 break;
2242 case -1:
2243#ifdef DEBUG_ENCODING
2244 xmlGenericError(xmlGenericErrorContext,
2245 "converted %d bytes to %d bytes of input, %d left\n",
2246 c_in, c_out, (int)xmlBufUse(in));
2247#endif
2248 break;
2249 case -3:
2250#ifdef DEBUG_ENCODING
2251 xmlGenericError(xmlGenericErrorContext,
2252 "converted %d bytes to %d bytes of input, %d left\n",
2253 c_in, c_out, (int)xmlBufUse(in));
2254#endif
2255 break;
2256 case -2: {
2257 char buf[50];
2258 const xmlChar *content = xmlBufContent(in);
2259
2260 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2261 content[0], content[1],
2262 content[2], content[3]);
2263 buf[49] = 0;
2264 xmlEncodingErr(XML_I18N_CONV_FAILED,
2265 "input conversion failed due to input error, bytes %s\n",
2266 buf);
2267 }
2268 }
2269 /*
2270 * Ignore when input buffer is not on a boundary
2271 */
2272 if (ret == -3)
2273 ret = 0;
2274 return (c_out? c_out : ret);
2275}
2276
2277/**
Owen Taylor3473f882001-02-23 17:55:21 +00002278 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002279 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002280 * @out: an xmlBuffer for the output.
2281 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002282 *
Owen Taylor3473f882001-02-23 17:55:21 +00002283 * Generic front-end for the encoding handler input function
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002284 *
2285 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002286 * -1 general error
2287 * -2 if the transcoding fails (for *in is not valid utf8 string or
2288 * the result of transformation can't fit into the encoding we want), or
2289 */
2290int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002291xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2292 xmlBufferPtr in)
2293{
Owen Taylor3473f882001-02-23 17:55:21 +00002294 int ret = -2;
2295 int written;
2296 int toconv;
2297
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002298 if (handler == NULL)
2299 return (-1);
2300 if (out == NULL)
2301 return (-1);
2302 if (in == NULL)
2303 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002304
2305 toconv = in->use;
2306 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002307 return (0);
Daniel Veillard69f04562011-08-19 11:05:04 +08002308 written = out->size - out->use -1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00002309 if (toconv * 2 >= written) {
2310 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002311 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002312 }
2313 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002314 ret = handler->input(&out->content[out->use], &written,
2315 in->content, &toconv);
2316 xmlBufferShrink(in, toconv);
2317 out->use += written;
2318 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002319 }
2320#ifdef LIBXML_ICONV_ENABLED
2321 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002322 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2323 &written, in->content, &toconv);
2324 xmlBufferShrink(in, toconv);
2325 out->use += written;
2326 out->content[out->use] = 0;
2327 if (ret == -1)
2328 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002329 }
2330#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002331#ifdef LIBXML_ICU_ENABLED
2332 else if (handler->uconv_in != NULL) {
2333 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2334 &written, in->content, &toconv);
2335 xmlBufferShrink(in, toconv);
2336 out->use += written;
2337 out->content[out->use] = 0;
2338 if (ret == -1)
2339 ret = -3;
2340 }
2341#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002342 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002343 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002344#ifdef DEBUG_ENCODING
2345 xmlGenericError(xmlGenericErrorContext,
2346 "converted %d bytes to %d bytes of input\n",
2347 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002348#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002349 break;
2350 case -1:
2351#ifdef DEBUG_ENCODING
2352 xmlGenericError(xmlGenericErrorContext,
2353 "converted %d bytes to %d bytes of input, %d left\n",
2354 toconv, written, in->use);
2355#endif
2356 break;
2357 case -3:
2358#ifdef DEBUG_ENCODING
2359 xmlGenericError(xmlGenericErrorContext,
2360 "converted %d bytes to %d bytes of input, %d left\n",
2361 toconv, written, in->use);
2362#endif
2363 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002364 case -2: {
2365 char buf[50];
2366
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002367 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002368 in->content[0], in->content[1],
2369 in->content[2], in->content[3]);
2370 buf[49] = 0;
2371 xmlEncodingErr(XML_I18N_CONV_FAILED,
2372 "input conversion failed due to input error, bytes %s\n",
2373 buf);
2374 }
Owen Taylor3473f882001-02-23 17:55:21 +00002375 }
2376 /*
2377 * Ignore when input buffer is not on a boundary
2378 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002379 if (ret == -3)
2380 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00002381 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002382}
2383
2384/**
Daniel Veillard18d0db22012-07-13 19:51:15 +08002385 * xmlCharEncOutput:
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002386 * @output: a parser output buffer
Daniel Veillard18d0db22012-07-13 19:51:15 +08002387 * @init: is this an initialization call without data
2388 *
2389 * Generic front-end for the encoding handler on parser output
2390 * a first call with @init == 1 has to be made first to initiate the
2391 * output in case of non-stateless encoding needing to initiate their
2392 * state or the output (like the BOM in UTF16).
2393 * In case of UTF8 sequence conversion errors for the given encoder,
2394 * the content will be automatically remapped to a CharRef sequence.
2395 *
2396 * Returns the number of byte written if success, or
2397 * -1 general error
2398 * -2 if the transcoding fails (for *in is not valid utf8 string or
2399 * the result of transformation can't fit into the encoding we want), or
2400 */
2401int
2402xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2403{
2404 int ret = -2;
2405 size_t written;
2406 size_t writtentot = 0;
2407 size_t toconv;
2408 int c_in;
2409 int c_out;
2410 xmlBufPtr in;
2411 xmlBufPtr out;
2412 int charref_len = 0;
2413
2414 if ((output == NULL) || (output->encoder == NULL) ||
2415 (output->buffer == NULL) || (output->conv == NULL))
2416 return (-1);
2417 out = output->conv;
2418 in = output->buffer;
2419
2420retry:
2421
2422 written = xmlBufAvail(out);
2423 if (written > 0)
2424 written--; /* count '\0' */
2425
2426 /*
2427 * First specific handling of the initialization call
2428 */
2429 if (init) {
2430 c_in = 0;
2431 c_out = written;
2432 if (output->encoder->output != NULL) {
2433 ret = output->encoder->output(xmlBufEnd(out), &c_out,
2434 NULL, &c_in);
2435 if (ret > 0) /* Gennady: check return value */
2436 xmlBufAddLen(out, c_out);
2437 }
2438#ifdef LIBXML_ICONV_ENABLED
2439 else if (output->encoder->iconv_out != NULL) {
2440 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2441 &c_out, NULL, &c_in);
2442 xmlBufAddLen(out, c_out);
2443 }
2444#endif /* LIBXML_ICONV_ENABLED */
2445#ifdef LIBXML_ICU_ENABLED
2446 else if (output->encoder->uconv_out != NULL) {
2447 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2448 &c_out, NULL, &c_in);
2449 xmlBufAddLen(out, c_out);
2450 }
2451#endif /* LIBXML_ICU_ENABLED */
2452#ifdef DEBUG_ENCODING
2453 xmlGenericError(xmlGenericErrorContext,
2454 "initialized encoder\n");
2455#endif
2456 return(0);
2457 }
2458
2459 /*
2460 * Conversion itself.
2461 */
2462 toconv = xmlBufUse(in);
2463 if (toconv == 0)
2464 return (0);
2465 if (toconv > 64 * 1024)
2466 toconv = 64 * 1024;
2467 if (toconv * 4 >= written) {
2468 xmlBufGrow(out, toconv * 4);
2469 written = xmlBufAvail(out) - 1;
2470 }
2471 if (written > 256 * 1024)
2472 written = 256 * 1024;
2473
2474 c_in = toconv;
2475 c_out = written;
2476 if (output->encoder->output != NULL) {
2477 ret = output->encoder->output(xmlBufEnd(out), &c_out,
2478 xmlBufContent(in), &c_in);
2479 if (c_out > 0) {
2480 xmlBufShrink(in, c_in);
2481 xmlBufAddLen(out, c_out);
2482 writtentot += c_out;
2483 }
2484 }
2485#ifdef LIBXML_ICONV_ENABLED
2486 else if (output->encoder->iconv_out != NULL) {
2487 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2488 &c_out, xmlBufContent(in), &c_in);
2489 xmlBufShrink(in, c_in);
2490 xmlBufAddLen(out, c_out);
2491 writtentot += c_out;
2492 if (ret == -1) {
2493 if (c_out > 0) {
2494 /*
2495 * Can be a limitation of iconv
2496 */
2497 charref_len = 0;
2498 goto retry;
2499 }
2500 ret = -3;
2501 }
2502 }
2503#endif /* LIBXML_ICONV_ENABLED */
2504#ifdef LIBXML_ICU_ENABLED
2505 else if (output->encoder->uconv_out != NULL) {
2506 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2507 &c_out, xmlBufContent(in), &c_in);
2508 xmlBufShrink(in, c_in);
2509 xmlBufAddLen(out, c_out);
2510 writtentot += c_out;
2511 if (ret == -1) {
2512 if (c_out > 0) {
2513 /*
2514 * Can be a limitation of uconv
2515 */
2516 charref_len = 0;
2517 goto retry;
2518 }
2519 ret = -3;
2520 }
2521 }
2522#endif /* LIBXML_ICU_ENABLED */
2523 else {
2524 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2525 "xmlCharEncOutFunc: no output function !\n", NULL);
2526 return(-1);
2527 }
2528
2529 if (ret >= 0) output += ret;
2530
2531 /*
2532 * Attempt to handle error cases
2533 */
2534 switch (ret) {
2535 case 0:
2536#ifdef DEBUG_ENCODING
2537 xmlGenericError(xmlGenericErrorContext,
2538 "converted %d bytes to %d bytes of output\n",
2539 c_in, c_out);
2540#endif
2541 break;
2542 case -1:
2543#ifdef DEBUG_ENCODING
2544 xmlGenericError(xmlGenericErrorContext,
2545 "output conversion failed by lack of space\n");
2546#endif
2547 break;
2548 case -3:
2549#ifdef DEBUG_ENCODING
2550 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2551 c_in, c_out, (int) xmlBufUse(in));
2552#endif
2553 break;
2554 case -2: {
2555 int len = (int) xmlBufUse(in);
2556 xmlChar *content = xmlBufContent(in);
2557 int cur;
2558
2559 cur = xmlGetUTF8Char(content, &len);
2560 if ((charref_len != 0) && (c_out < charref_len)) {
2561 /*
2562 * We attempted to insert a character reference and failed.
2563 * Undo what was written and skip the remaining charref.
2564 */
2565 xmlBufErase(out, c_out);
2566 writtentot -= c_out;
2567 xmlBufShrink(in, charref_len - c_out);
2568 charref_len = 0;
2569
2570 ret = -1;
2571 break;
2572 } else if (cur > 0) {
2573 xmlChar charref[20];
2574
2575#ifdef DEBUG_ENCODING
2576 xmlGenericError(xmlGenericErrorContext,
2577 "handling output conversion error\n");
2578 xmlGenericError(xmlGenericErrorContext,
2579 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2580 content[0], content[1],
2581 content[2], content[3]);
2582#endif
2583 /*
2584 * Removes the UTF8 sequence, and replace it by a charref
2585 * and continue the transcoding phase, hoping the error
2586 * did not mangle the encoder state.
2587 */
2588 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2589 "&#%d;", cur);
2590 xmlBufShrink(in, len);
2591 xmlBufAddHead(in, charref, -1);
2592
2593 goto retry;
2594 } else {
2595 char buf[50];
2596
2597 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2598 content[0], content[1],
2599 content[2], content[3]);
2600 buf[49] = 0;
2601 xmlEncodingErr(XML_I18N_CONV_FAILED,
2602 "output conversion failed due to conv error, bytes %s\n",
2603 buf);
2604 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2605 content[0] = ' ';
2606 }
2607 break;
2608 }
2609 }
2610 return(ret);
2611}
2612
2613/**
Owen Taylor3473f882001-02-23 17:55:21 +00002614 * xmlCharEncOutFunc:
2615 * @handler: char enconding transformation data structure
2616 * @out: an xmlBuffer for the output.
2617 * @in: an xmlBuffer for the input
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002618 *
Owen Taylor3473f882001-02-23 17:55:21 +00002619 * Generic front-end for the encoding handler output function
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002620 * a first call with @in == NULL has to be made firs to initiate the
Owen Taylor3473f882001-02-23 17:55:21 +00002621 * output in case of non-stateless encoding needing to initiate their
2622 * state or the output (like the BOM in UTF16).
2623 * In case of UTF8 sequence conversion errors for the given encoder,
2624 * the content will be automatically remapped to a CharRef sequence.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002625 *
2626 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002627 * -1 general error
2628 * -2 if the transcoding fails (for *in is not valid utf8 string or
2629 * the result of transformation can't fit into the encoding we want), or
2630 */
2631int
2632xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2633 xmlBufferPtr in) {
2634 int ret = -2;
2635 int written;
2636 int writtentot = 0;
2637 int toconv;
2638 int output = 0;
Timothy Elliott689408b2012-05-08 22:03:22 +08002639 int charref_len = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002640
2641 if (handler == NULL) return(-1);
2642 if (out == NULL) return(-1);
2643
2644retry:
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002645
Owen Taylor3473f882001-02-23 17:55:21 +00002646 written = out->size - out->use;
2647
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002648 if (written > 0)
2649 written--; /* Gennady: count '/0' */
2650
Owen Taylor3473f882001-02-23 17:55:21 +00002651 /*
2652 * First specific handling of in = NULL, i.e. the initialization call
2653 */
2654 if (in == NULL) {
2655 toconv = 0;
2656 if (handler->output != NULL) {
2657 ret = handler->output(&out->content[out->use], &written,
2658 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002659 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002660 out->use += written;
2661 out->content[out->use] = 0;
2662 }
Owen Taylor3473f882001-02-23 17:55:21 +00002663 }
2664#ifdef LIBXML_ICONV_ENABLED
2665 else if (handler->iconv_out != NULL) {
2666 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2667 &written, NULL, &toconv);
2668 out->use += written;
2669 out->content[out->use] = 0;
2670 }
2671#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002672#ifdef LIBXML_ICU_ENABLED
2673 else if (handler->uconv_out != NULL) {
2674 ret = xmlUconvWrapper(handler->uconv_out, 0,
2675 &out->content[out->use],
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002676 &written, NULL, &toconv);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002677 out->use += written;
2678 out->content[out->use] = 0;
2679 }
2680#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002681#ifdef DEBUG_ENCODING
2682 xmlGenericError(xmlGenericErrorContext,
2683 "initialized encoder\n");
2684#endif
2685 return(0);
2686 }
2687
2688 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002689 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002690 */
2691 toconv = in->use;
2692 if (toconv == 0)
2693 return(0);
Daniel Veillardf1245392008-04-03 09:46:34 +00002694 if (toconv * 4 >= written) {
2695 xmlBufferGrow(out, toconv * 4);
Owen Taylor3473f882001-02-23 17:55:21 +00002696 written = out->size - out->use - 1;
2697 }
2698 if (handler->output != NULL) {
2699 ret = handler->output(&out->content[out->use], &written,
2700 in->content, &toconv);
Daniel Veillarde83e93e2008-08-30 12:52:26 +00002701 if (written > 0) {
2702 xmlBufferShrink(in, toconv);
2703 out->use += written;
2704 writtentot += written;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002705 }
Owen Taylor3473f882001-02-23 17:55:21 +00002706 out->content[out->use] = 0;
2707 }
2708#ifdef LIBXML_ICONV_ENABLED
2709 else if (handler->iconv_out != NULL) {
2710 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2711 &written, in->content, &toconv);
2712 xmlBufferShrink(in, toconv);
2713 out->use += written;
2714 writtentot += written;
2715 out->content[out->use] = 0;
2716 if (ret == -1) {
2717 if (written > 0) {
2718 /*
2719 * Can be a limitation of iconv
2720 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002721 charref_len = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 goto retry;
2723 }
2724 ret = -3;
2725 }
2726 }
2727#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002728#ifdef LIBXML_ICU_ENABLED
2729 else if (handler->uconv_out != NULL) {
2730 ret = xmlUconvWrapper(handler->uconv_out, 0,
2731 &out->content[out->use],
2732 &written, in->content, &toconv);
2733 xmlBufferShrink(in, toconv);
2734 out->use += written;
2735 writtentot += written;
2736 out->content[out->use] = 0;
2737 if (ret == -1) {
2738 if (written > 0) {
2739 /*
2740 * Can be a limitation of iconv
2741 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002742 charref_len = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002743 goto retry;
2744 }
2745 ret = -3;
2746 }
2747 }
2748#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002749 else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002750 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2751 "xmlCharEncOutFunc: no output function !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002752 return(-1);
2753 }
2754
2755 if (ret >= 0) output += ret;
2756
2757 /*
2758 * Attempt to handle error cases
2759 */
2760 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002761 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002762#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002763 xmlGenericError(xmlGenericErrorContext,
2764 "converted %d bytes to %d bytes of output\n",
2765 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002766#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002767 break;
2768 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002769#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002770 xmlGenericError(xmlGenericErrorContext,
2771 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002772#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002773 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002775#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002776 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2777 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002778#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002779 break;
2780 case -2: {
2781 int len = in->use;
2782 const xmlChar *utf = (const xmlChar *) in->content;
2783 int cur;
2784
2785 cur = xmlGetUTF8Char(utf, &len);
Timothy Elliott689408b2012-05-08 22:03:22 +08002786 if ((charref_len != 0) && (written < charref_len)) {
2787 /*
2788 * We attempted to insert a character reference and failed.
2789 * Undo what was written and skip the remaining charref.
2790 */
2791 out->use -= written;
2792 writtentot -= written;
2793 xmlBufferShrink(in, charref_len - written);
2794 charref_len = 0;
2795
2796 ret = -1;
2797 break;
2798 } else if (cur > 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 xmlChar charref[20];
2800
2801#ifdef DEBUG_ENCODING
2802 xmlGenericError(xmlGenericErrorContext,
2803 "handling output conversion error\n");
2804 xmlGenericError(xmlGenericErrorContext,
2805 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2806 in->content[0], in->content[1],
2807 in->content[2], in->content[3]);
2808#endif
2809 /*
2810 * Removes the UTF8 sequence, and replace it by a charref
2811 * and continue the transcoding phase, hoping the error
2812 * did not mangle the encoder state.
2813 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002814 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2815 "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002816 xmlBufferShrink(in, len);
2817 xmlBufferAddHead(in, charref, -1);
2818
2819 goto retry;
2820 } else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002821 char buf[50];
2822
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002823 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002824 in->content[0], in->content[1],
2825 in->content[2], in->content[3]);
2826 buf[49] = 0;
2827 xmlEncodingErr(XML_I18N_CONV_FAILED,
2828 "output conversion failed due to conv error, bytes %s\n",
2829 buf);
Daniel Veillarddf750622006-05-02 12:24:06 +00002830 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2831 in->content[0] = ' ';
Owen Taylor3473f882001-02-23 17:55:21 +00002832 }
2833 break;
2834 }
2835 }
2836 return(ret);
2837}
2838
2839/**
2840 * xmlCharEncCloseFunc:
2841 * @handler: char enconding transformation data structure
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002842 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002843 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002844 *
2845 * Returns 0 if success, or -1 in case of error
2846 */
2847int
2848xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2849 int ret = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002850 int tofree = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002851 if (handler == NULL) return(-1);
2852 if (handler->name == NULL) return(-1);
2853#ifdef LIBXML_ICONV_ENABLED
2854 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002855 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002856 * and the associated icon resources.
2857 */
2858 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002859 tofree = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (handler->iconv_out != NULL) {
2861 if (iconv_close(handler->iconv_out))
2862 ret = -1;
2863 handler->iconv_out = NULL;
2864 }
2865 if (handler->iconv_in != NULL) {
2866 if (iconv_close(handler->iconv_in))
2867 ret = -1;
2868 handler->iconv_in = NULL;
2869 }
Owen Taylor3473f882001-02-23 17:55:21 +00002870 }
2871#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002872#ifdef LIBXML_ICU_ENABLED
2873 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2874 tofree = 1;
2875 if (handler->uconv_out != NULL) {
2876 closeIcuConverter(handler->uconv_out);
2877 handler->uconv_out = NULL;
2878 }
2879 if (handler->uconv_in != NULL) {
2880 closeIcuConverter(handler->uconv_in);
2881 handler->uconv_in = NULL;
2882 }
2883 }
2884#endif
2885 if (tofree) {
2886 /* free up only dynamic handlers iconv/uconv */
2887 if (handler->name != NULL)
2888 xmlFree(handler->name);
2889 handler->name = NULL;
2890 xmlFree(handler);
2891 }
Owen Taylor3473f882001-02-23 17:55:21 +00002892#ifdef DEBUG_ENCODING
2893 if (ret)
2894 xmlGenericError(xmlGenericErrorContext,
2895 "failed to close the encoding handler\n");
2896 else
2897 xmlGenericError(xmlGenericErrorContext,
2898 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002899#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002900
Owen Taylor3473f882001-02-23 17:55:21 +00002901 return(ret);
2902}
2903
Daniel Veillard36711902004-02-11 13:25:26 +00002904/**
2905 * xmlByteConsumed:
2906 * @ctxt: an XML parser context
2907 *
2908 * This function provides the current index of the parser relative
2909 * to the start of the current entity. This function is computed in
2910 * bytes from the beginning starting at zero and finishing at the
2911 * size in byte of the file if parsing a file. The function is
2912 * of constant cost if the input is UTF-8 but can be costly if run
2913 * on non-UTF-8 input.
2914 *
2915 * Returns the index in bytes from the beginning of the entity or -1
2916 * in case the index could not be computed.
2917 */
2918long
2919xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2920 xmlParserInputPtr in;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002921
Daniel Veillard36711902004-02-11 13:25:26 +00002922 if (ctxt == NULL) return(-1);
2923 in = ctxt->input;
2924 if (in == NULL) return(-1);
2925 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2926 unsigned int unused = 0;
2927 xmlCharEncodingHandler * handler = in->buf->encoder;
2928 /*
2929 * Encoding conversion, compute the number of unused original
2930 * bytes from the input not consumed and substract that from
2931 * the raw consumed value, this is not a cheap operation
2932 */
2933 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002934 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002935 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002936 int toconv = in->end - in->cur, written = 32000;
2937
2938 int ret;
2939
2940 if (handler->output != NULL) {
2941 do {
2942 toconv = in->end - cur;
2943 written = 32000;
2944 ret = handler->output(&convbuf[0], &written,
2945 cur, &toconv);
2946 if (ret == -1) return(-1);
2947 unused += written;
2948 cur += toconv;
2949 } while (ret == -2);
2950#ifdef LIBXML_ICONV_ENABLED
2951 } else if (handler->iconv_out != NULL) {
2952 do {
2953 toconv = in->end - cur;
2954 written = 32000;
2955 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2956 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002957 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002958 if (written > 0)
2959 ret = -2;
2960 else
2961 return(-1);
2962 }
2963 unused += written;
2964 cur += toconv;
2965 } while (ret == -2);
2966#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002967#ifdef LIBXML_ICU_ENABLED
2968 } else if (handler->uconv_out != NULL) {
2969 do {
2970 toconv = in->end - cur;
2971 written = 32000;
2972 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2973 &written, cur, &toconv);
2974 if (ret < 0) {
2975 if (written > 0)
2976 ret = -2;
2977 else
2978 return(-1);
2979 }
2980 unused += written;
2981 cur += toconv;
2982 } while (ret == -2);
2983#endif
Daniel Veillard36711902004-02-11 13:25:26 +00002984 } else {
2985 /* could not find a converter */
2986 return(-1);
2987 }
2988 }
2989 if (in->buf->rawconsumed < unused)
2990 return(-1);
2991 return(in->buf->rawconsumed - unused);
2992 }
2993 return(in->consumed + (in->cur - in->base));
2994}
2995
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002996#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002997#ifdef LIBXML_ISO8859X_ENABLED
2998
2999/**
3000 * UTF8ToISO8859x:
3001 * @out: a pointer to an array of bytes to store the result
3002 * @outlen: the length of @out
3003 * @in: a pointer to an array of UTF-8 chars
3004 * @inlen: the length of @in
3005 * @xlattable: the 2-level transcoding table
3006 *
3007 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3008 * block of chars out.
3009 *
3010 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
3011 * The value of @inlen after return is the number of octets consumed
3012 * as the return value is positive, else unpredictable.
3013 * The value of @outlen after return is the number of ocetes consumed.
3014 */
3015static int
3016UTF8ToISO8859x(unsigned char* out, int *outlen,
3017 const unsigned char* in, int *inlen,
3018 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003019 const unsigned char* outstart = out;
3020 const unsigned char* inend;
3021 const unsigned char* instart = in;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003022 const unsigned char* processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003023
Daniel Veillardce682bc2004-11-05 17:22:25 +00003024 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3025 (xlattable == NULL))
3026 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003027 if (in == NULL) {
3028 /*
3029 * initialization nothing to do
3030 */
3031 *outlen = 0;
3032 *inlen = 0;
3033 return(0);
3034 }
3035 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003036 while (in < inend) {
3037 unsigned char d = *in++;
3038 if (d < 0x80) {
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003039 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003040 } else if (d < 0xC0) {
3041 /* trailing byte in leading position */
3042 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003043 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003044 return(-2);
3045 } else if (d < 0xE0) {
3046 unsigned char c;
3047 if (!(in < inend)) {
3048 /* trailing byte not in input buffer */
3049 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003050 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01003051 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003052 }
3053 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003054 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003055 /* not a trailing byte */
3056 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003057 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003058 return(-2);
3059 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003060 c = c & 0x3F;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003061 d = d & 0x1F;
3062 d = xlattable [48 + c + xlattable [d] * 64];
3063 if (d == 0) {
3064 /* not in character set */
3065 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003066 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003067 return(-2);
3068 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003069 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003070 } else if (d < 0xF0) {
3071 unsigned char c1;
3072 unsigned char c2;
3073 if (!(in < inend - 1)) {
3074 /* trailing bytes not in input buffer */
3075 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003076 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01003077 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003078 }
3079 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003080 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003081 /* not a trailing byte (c1) */
3082 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003083 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003084 return(-2);
3085 }
3086 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003087 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003088 /* not a trailing byte (c2) */
3089 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003090 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003091 return(-2);
3092 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003093 c1 = c1 & 0x3F;
3094 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00003095 d = d & 0x0F;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003096 d = xlattable [48 + c2 + xlattable [48 + c1 +
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003097 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003098 if (d == 0) {
3099 /* not in character set */
3100 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003101 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003102 return(-2);
3103 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003104 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003105 } else {
3106 /* cannot transcode >= U+010000 */
3107 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003108 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003109 return(-2);
3110 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003111 processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003112 }
3113 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003114 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00003115 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003116}
3117
3118/**
3119 * ISO8859xToUTF8
3120 * @out: a pointer to an array of bytes to store the result
3121 * @outlen: the length of @out
3122 * @in: a pointer to an array of ISO Latin 1 chars
3123 * @inlen: the length of @in
3124 *
3125 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3126 * block of chars out.
3127 * Returns 0 if success, or -1 otherwise
3128 * The value of @inlen after return is the number of octets consumed
3129 * The value of @outlen after return is the number of ocetes produced.
3130 */
3131static int
3132ISO8859xToUTF8(unsigned char* out, int *outlen,
3133 const unsigned char* in, int *inlen,
3134 unsigned short const *unicodetable) {
3135 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003136 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003137 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003138 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00003139 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003140 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003141
Daniel Veillardce682bc2004-11-05 17:22:25 +00003142 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00003143 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00003144 return(-1);
3145 outend = out + *outlen;
3146 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00003147 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003148
3149 while ((in < inend) && (out < outend - 2)) {
3150 if (*in >= 0x80) {
3151 c = unicodetable [*in - 0x80];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003152 if (c == 0) {
3153 /* undefined code point */
3154 *outlen = out - outstart;
3155 *inlen = in - instart;
3156 return (-1);
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003157 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003158 if (c < 0x800) {
3159 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3160 *out++ = (c & 0x3F) | 0x80;
3161 } else {
3162 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3163 *out++ = ((c >> 6) & 0x3F) | 0x80;
3164 *out++ = (c & 0x3F) | 0x80;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003165 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003166 ++in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003167 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003168 if (instop - in > outend - out) instop = in + (outend - out);
3169 while ((*in < 0x80) && (in < instop)) {
3170 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003171 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003172 }
3173 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3174 *out++ = *in++;
3175 }
3176 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3177 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003178 }
3179 *outlen = out - outstart;
3180 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00003181 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003182}
3183
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003184
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003185/************************************************************************
3186 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3187 ************************************************************************/
3188
3189static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003190 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3191 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3192 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3193 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3194 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3195 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3196 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3197 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3198 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3199 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3200 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3201 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3202 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3203 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3204 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3205 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003206};
3207
3208static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3209 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3217 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3218 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3219 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3221 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3229 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3230 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3231 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3232 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3233 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3234 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3235 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3236};
3237
3238static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003239 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3240 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3241 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3242 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3243 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3244 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3245 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3246 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3247 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3248 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3249 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3250 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3251 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3252 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3253 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3254 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003255};
3256
3257static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3258 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3266 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3267 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3268 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3271 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3284 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3285 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3286 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3287 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3288 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3289};
3290
3291static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003292 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3293 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3294 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3295 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3296 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3297 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3298 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3299 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3300 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3301 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3302 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3303 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3304 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3305 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3306 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3307 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003308};
3309
3310static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3311 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3319 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3320 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3321 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3322 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3323 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3325 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3328 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3329 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3335 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3336 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3337 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3338};
3339
3340static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003341 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3342 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3343 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3344 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3345 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3346 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3347 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3348 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3349 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3350 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3351 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3352 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3353 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3354 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3355 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3356 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003357};
3358
3359static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3360 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3368 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3369 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3372 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3373 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3374 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3375 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3376 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387};
3388
3389static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003390 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3391 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3392 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3393 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3394 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3395 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3396 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3397 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3398 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3399 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3400 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3401 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3402 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3403 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3404 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3405 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003406};
3407
3408static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3409 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3417 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3418 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3426 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3427 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3428 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3429 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432};
3433
3434static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003435 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3436 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3437 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3438 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3439 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3440 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3441 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3442 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3443 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3444 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3445 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3446 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3447 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3448 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3449 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3450 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003451};
3452
3453static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3454 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3462 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3463 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3464 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3465 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3478 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3479 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3480 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3481 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485};
3486
3487static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003488 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3489 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3490 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3491 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3492 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3493 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3494 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3495 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3496 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3497 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3498 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3499 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3500 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3501 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3502 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3503 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003504};
3505
3506static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3507 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3515 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3516 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3517 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3518 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3524 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3531 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3532 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3536 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538};
3539
3540static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003541 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3542 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3543 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3544 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3545 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3546 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3547 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3548 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3549 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3550 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3551 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3552 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3553 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3554 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3555 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3556 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003557};
3558
3559static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3560 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3568 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3569 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3570 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3571 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3572 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3573 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3574 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583};
3584
3585static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003586 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3587 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3588 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3589 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3590 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3591 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3592 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3593 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3594 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3595 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3596 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3597 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3598 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3599 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3600 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3601 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003602};
3603
3604static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3605 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3613 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3614 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3615 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3617 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3619 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3623 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3624 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3633 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3634 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3635 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3636};
3637
3638static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003639 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3640 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3641 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3642 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3643 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3644 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3645 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3646 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3647 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3648 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3649 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3650 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3651 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3652 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3653 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3654 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003655};
3656
3657static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3658 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3666 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3667 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3673 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3674 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3675 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3676 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3677 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3682 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685};
3686
3687static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003688 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3689 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3690 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3691 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3692 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3693 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3694 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3695 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3696 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3697 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3698 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3699 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3700 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3701 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3702 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3703 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003704};
3705
3706static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3707 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3715 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3716 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3717 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3718 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3728 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3729 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3730 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3731 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3732 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3733 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3734 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3735 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3736 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3737 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3738};
3739
3740static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003741 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3742 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3743 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3744 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3745 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3746 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3747 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3748 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3749 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3750 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3751 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3752 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3753 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3754 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3755 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3756 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003757};
3758
3759static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3760 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3762 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3768 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3769 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3770 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3775 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3794 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3795 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3797 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3800 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3801 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3802 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3803};
3804
3805static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003806 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3807 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3808 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3809 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3810 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3811 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3812 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3813 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3814 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3815 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3816 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3817 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3818 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3819 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3820 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3821 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003822};
3823
3824static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3825 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3832 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3833 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3834 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3835 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3836 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3848 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3849 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3850 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3851 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3852};
3853
3854static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003855 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3856 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3857 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3858 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3859 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3860 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3861 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3862 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3863 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3864 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3865 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3866 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3867 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3868 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3869 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3870 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003871};
3872
3873static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3874 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3876 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3881 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3882 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3883 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3884 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3885 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3886 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3889 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3890 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3891 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3892 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3893 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3900 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3902 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3906 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3909 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3910 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3911 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3912 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3913};
3914
3915
3916/*
3917 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3918 */
3919
3920static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3921 const unsigned char* in, int *inlen) {
3922 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3923}
3924static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3925 const unsigned char* in, int *inlen) {
3926 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3927}
3928
3929static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3930 const unsigned char* in, int *inlen) {
3931 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3932}
3933static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3934 const unsigned char* in, int *inlen) {
3935 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3936}
3937
3938static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3939 const unsigned char* in, int *inlen) {
3940 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3941}
3942static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3943 const unsigned char* in, int *inlen) {
3944 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3945}
3946
3947static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3948 const unsigned char* in, int *inlen) {
3949 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3950}
3951static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3952 const unsigned char* in, int *inlen) {
3953 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3954}
3955
3956static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3957 const unsigned char* in, int *inlen) {
3958 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3959}
3960static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3961 const unsigned char* in, int *inlen) {
3962 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3963}
3964
3965static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3966 const unsigned char* in, int *inlen) {
3967 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3968}
3969static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3970 const unsigned char* in, int *inlen) {
3971 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3972}
3973
3974static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3975 const unsigned char* in, int *inlen) {
3976 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3977}
3978static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3979 const unsigned char* in, int *inlen) {
3980 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3981}
3982
3983static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3984 const unsigned char* in, int *inlen) {
3985 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3986}
3987static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3988 const unsigned char* in, int *inlen) {
3989 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3990}
3991
3992static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3993 const unsigned char* in, int *inlen) {
3994 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3995}
3996static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3997 const unsigned char* in, int *inlen) {
3998 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3999}
4000
4001static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
4002 const unsigned char* in, int *inlen) {
4003 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4004}
4005static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
4006 const unsigned char* in, int *inlen) {
4007 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4008}
4009
4010static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
4011 const unsigned char* in, int *inlen) {
4012 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4013}
4014static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
4015 const unsigned char* in, int *inlen) {
4016 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4017}
4018
4019static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
4020 const unsigned char* in, int *inlen) {
4021 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4022}
4023static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
4024 const unsigned char* in, int *inlen) {
4025 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4026}
4027
4028static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
4029 const unsigned char* in, int *inlen) {
4030 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4031}
4032static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
4033 const unsigned char* in, int *inlen) {
4034 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4035}
4036
4037static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
4038 const unsigned char* in, int *inlen) {
4039 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4040}
4041static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
4042 const unsigned char* in, int *inlen) {
4043 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
4044}
4045
4046static void
4047xmlRegisterCharEncodingHandlersISO8859x (void) {
4048 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
4049 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
4050 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
4051 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
4052 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
4053 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
4054 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
4055 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
4056 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
4057 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
4058 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
4059 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
4060 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
4061 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
4062}
4063
4064#endif
4065#endif
4066
Daniel Veillard5d4644e2005-04-01 13:11:58 +00004067#define bottom_encoding
4068#include "elfgcchack.h"