blob: e49c7f898fcdf1d1625a8c4c95e9e01af80b8ea0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01004 * Related specs:
Owen Taylor3473f882001-02-23 17:55:21 +00005 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
Daniel Veillard18d0db22012-07-13 19:51:15 +080027#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29#ifdef HAVE_CTYPE_H
30#include <ctype.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef LIBXML_ICONV_ENABLED
36#ifdef HAVE_ERRNO_H
37#include <errno.h>
38#endif
39#endif
40#include <libxml/encoding.h>
41#include <libxml/xmlmemory.h>
42#ifdef LIBXML_HTML_ENABLED
43#include <libxml/HTMLparser.h>
44#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000045#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000046#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047
Daniel Veillard18d0db22012-07-13 19:51:15 +080048#include "buf.h"
49#include "enc.h"
50
Daniel Veillard22090732001-07-16 00:06:07 +000051static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000053
54typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59};
60
61static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62static int xmlCharEncodingAliasesNb = 0;
63static int xmlCharEncodingAliasesMax = 0;
64
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +010065#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
Owen Taylor3473f882001-02-23 17:55:21 +000066#if 0
67#define DEBUG_ENCODING /* Define this to get encoding traces */
68#endif
William M. Brack16db7b62003-08-07 13:12:49 +000069#else
70#ifdef LIBXML_ISO8859X_ENABLED
71static void xmlRegisterCharEncodingHandlersISO8859x (void);
72#endif
Owen Taylor3473f882001-02-23 17:55:21 +000073#endif
74
75static int xmlLittleEndian = 1;
76
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000077/**
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
80 *
81 * Handle an out of memory condition
82 */
83static void
84xmlEncodingErrMemory(const char *extra)
85{
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87}
88
89/**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
Xin Li28c53d32017-03-07 00:33:02 +000096static void LIBXML_ATTR_FORMAT(2,0)
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000097xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98{
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102}
Daniel Veillard97ac1312001-05-30 19:14:17 +0000103
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100104#ifdef LIBXML_ICU_ENABLED
105static uconv_t*
106openIcuConverter(const char* name, int toUnicode)
107{
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->uconv = ucnv_open(name, &status);
114 if (U_FAILURE(status))
115 goto error;
116
117 status = U_ZERO_ERROR;
118 if (toUnicode) {
119 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
121 }
122 else {
123 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124 NULL, NULL, NULL, &status);
125 }
126 if (U_FAILURE(status))
127 goto error;
128
129 status = U_ZERO_ERROR;
130 conv->utf8 = ucnv_open("UTF-8", &status);
131 if (U_SUCCESS(status))
132 return conv;
133
134error:
135 if (conv->uconv)
136 ucnv_close(conv->uconv);
137 xmlFree(conv);
138 return NULL;
139}
140
141static void
142closeIcuConverter(uconv_t *conv)
143{
144 if (conv != NULL) {
145 ucnv_close(conv->uconv);
146 ucnv_close(conv->utf8);
147 xmlFree(conv);
148 }
149}
150#endif /* LIBXML_ICU_ENABLED */
151
Daniel Veillard97ac1312001-05-30 19:14:17 +0000152/************************************************************************
153 * *
154 * Conversions To/From UTF8 encoding *
155 * *
156 ************************************************************************/
157
158/**
Owen Taylor3473f882001-02-23 17:55:21 +0000159 * asciiToUTF8:
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of ASCII chars
163 * @inlen: the length of @in
164 *
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000169 * if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000171 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000172static int
Owen Taylor3473f882001-02-23 17:55:21 +0000173asciiToUTF8(unsigned char* out, int *outlen,
174 const unsigned char* in, int *inlen) {
175 unsigned char* outstart = out;
176 const unsigned char* base = in;
177 const unsigned char* processed = in;
178 unsigned char* outend = out + *outlen;
179 const unsigned char* inend;
180 unsigned int c;
Owen Taylor3473f882001-02-23 17:55:21 +0000181
182 inend = in + (*inlen);
183 while ((in < inend) && (out - outstart + 5 < *outlen)) {
184 c= *in++;
185
Owen Taylor3473f882001-02-23 17:55:21 +0000186 if (out >= outend)
187 break;
Daniel Veillard2728f842006-03-09 16:49:24 +0000188 if (c < 0x80) {
189 *out++ = c;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100190 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000191 *outlen = out - outstart;
192 *inlen = processed - base;
193 return(-1);
194 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100195
Owen Taylor3473f882001-02-23 17:55:21 +0000196 processed = (const unsigned char*) in;
197 }
198 *outlen = out - outstart;
199 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000200 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000201}
202
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000203#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000204/**
205 * UTF8Toascii:
206 * @out: a pointer to an array of bytes to store the result
207 * @outlen: the length of @out
208 * @in: a pointer to an array of UTF-8 chars
209 * @inlen: the length of @in
210 *
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
213 *
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000216 * if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000218 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000219static int
Owen Taylor3473f882001-02-23 17:55:21 +0000220UTF8Toascii(unsigned char* out, int *outlen,
221 const unsigned char* in, int *inlen) {
222 const unsigned char* processed = in;
223 const unsigned char* outend;
224 const unsigned char* outstart = out;
225 const unsigned char* instart = in;
226 const unsigned char* inend;
227 unsigned int c, d;
228 int trailing;
229
Daniel Veillardce682bc2004-11-05 17:22:25 +0000230 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 if (in == NULL) {
232 /*
233 * initialization nothing to do
234 */
235 *outlen = 0;
236 *inlen = 0;
237 return(0);
238 }
239 inend = in + (*inlen);
240 outend = out + (*outlen);
241 while (in < inend) {
242 d = *in++;
243 if (d < 0x80) { c= d; trailing= 0; }
244 else if (d < 0xC0) {
245 /* trailing byte in leading position */
246 *outlen = out - outstart;
247 *inlen = processed - instart;
248 return(-2);
249 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
250 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
251 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
252 else {
253 /* no chance for this in Ascii */
254 *outlen = out - outstart;
255 *inlen = processed - instart;
256 return(-2);
257 }
258
259 if (inend - in < trailing) {
260 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100261 }
Owen Taylor3473f882001-02-23 17:55:21 +0000262
263 for ( ; trailing; trailing--) {
264 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265 break;
266 c <<= 6;
267 c |= d & 0x3F;
268 }
269
270 /* assertion: c is a single UTF-4 value */
271 if (c < 0x80) {
272 if (out >= outend)
273 break;
274 *out++ = c;
275 } else {
276 /* no chance for this in Ascii */
277 *outlen = out - outstart;
278 *inlen = processed - instart;
279 return(-2);
280 }
281 processed = in;
282 }
283 *outlen = out - outstart;
284 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000285 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000286}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000287#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289/**
290 * isolat1ToUTF8:
291 * @out: a pointer to an array of bytes to store the result
292 * @outlen: the length of @out
293 * @in: a pointer to an array of ISO Latin 1 chars
294 * @inlen: the length of @in
295 *
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000298 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000299 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000300 * if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000302 */
303int
304isolat1ToUTF8(unsigned char* out, int *outlen,
305 const unsigned char* in, int *inlen) {
306 unsigned char* outstart = out;
307 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000308 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000310 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000311
Daniel Veillardce682bc2004-11-05 17:22:25 +0000312 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313 return(-1);
314
315 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000316 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000317 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100318
319 while ((in < inend) && (out < outend - 1)) {
320 if (*in >= 0x80) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000321 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100322 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000323 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000324 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100325 if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 while ((in < instop) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000327 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000328 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100329 }
330 if ((in < inend) && (out < outend) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000331 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000332 }
333 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000334 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000335 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000336}
337
Daniel Veillard81601f92003-01-14 13:42:37 +0000338/**
339 * UTF8ToUTF8:
340 * @out: a pointer to an array of bytes to store the result
341 * @outlen: the length of @out
342 * @inb: a pointer to an array of UTF-8 chars
343 * @inlenb: the length of @in in UTF-8 chars
344 *
345 * No op copy operation for UTF8 handling.
346 *
William M. Brackf9415e42003-11-28 09:39:10 +0000347 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000348 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000349 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000350 */
351static int
352UTF8ToUTF8(unsigned char* out, int *outlen,
353 const unsigned char* inb, int *inlenb)
354{
355 int len;
356
357 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
358 return(-1);
359 if (*outlen > *inlenb) {
360 len = *inlenb;
361 } else {
362 len = *outlen;
363 }
364 if (len < 0)
365 return(-1);
366
367 memcpy(out, inb, len);
368
369 *outlen = len;
370 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000371 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000372}
373
Daniel Veillarde72c7562002-05-31 09:47:30 +0000374
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000375#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000376/**
377 * UTF8Toisolat1:
378 * @out: a pointer to an array of bytes to store the result
379 * @outlen: the length of @out
380 * @in: a pointer to an array of UTF-8 chars
381 * @inlen: the length of @in
382 *
383 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
384 * block of chars out.
385 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000386 * Returns the number of bytes written if success, -2 if the transcoding fails,
387 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000388 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000389 * if the return value is positive, else unpredictable.
390 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000391 */
392int
393UTF8Toisolat1(unsigned char* out, int *outlen,
394 const unsigned char* in, int *inlen) {
395 const unsigned char* processed = in;
396 const unsigned char* outend;
397 const unsigned char* outstart = out;
398 const unsigned char* instart = in;
399 const unsigned char* inend;
400 unsigned int c, d;
401 int trailing;
402
Daniel Veillardce682bc2004-11-05 17:22:25 +0000403 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000404 if (in == NULL) {
405 /*
406 * initialization nothing to do
407 */
408 *outlen = 0;
409 *inlen = 0;
410 return(0);
411 }
412 inend = in + (*inlen);
413 outend = out + (*outlen);
414 while (in < inend) {
415 d = *in++;
416 if (d < 0x80) { c= d; trailing= 0; }
417 else if (d < 0xC0) {
418 /* trailing byte in leading position */
419 *outlen = out - outstart;
420 *inlen = processed - instart;
421 return(-2);
422 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
423 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
424 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
425 else {
426 /* no chance for this in IsoLat1 */
427 *outlen = out - outstart;
428 *inlen = processed - instart;
429 return(-2);
430 }
431
432 if (inend - in < trailing) {
433 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100434 }
Owen Taylor3473f882001-02-23 17:55:21 +0000435
436 for ( ; trailing; trailing--) {
437 if (in >= inend)
438 break;
439 if (((d= *in++) & 0xC0) != 0x80) {
440 *outlen = out - outstart;
441 *inlen = processed - instart;
442 return(-2);
443 }
444 c <<= 6;
445 c |= d & 0x3F;
446 }
447
448 /* assertion: c is a single UTF-4 value */
449 if (c <= 0xFF) {
450 if (out >= outend)
451 break;
452 *out++ = c;
453 } else {
454 /* no chance for this in IsoLat1 */
455 *outlen = out - outstart;
456 *inlen = processed - instart;
457 return(-2);
458 }
459 processed = in;
460 }
461 *outlen = out - outstart;
462 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000463 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000464}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000465#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000466
467/**
468 * UTF16LEToUTF8:
469 * @out: a pointer to an array of bytes to store the result
470 * @outlen: the length of @out
471 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
472 * @inlenb: the length of @in in UTF-16LE chars
473 *
474 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000475 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000476 * is the same between the native type of this machine and the
477 * inputed one.
478 *
William M. Brackf9415e42003-11-28 09:39:10 +0000479 * Returns the number of bytes written, or -1 if lack of space, or -2
480 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000481 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000482 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000483 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000484static int
Owen Taylor3473f882001-02-23 17:55:21 +0000485UTF16LEToUTF8(unsigned char* out, int *outlen,
486 const unsigned char* inb, int *inlenb)
487{
488 unsigned char* outstart = out;
489 const unsigned char* processed = inb;
490 unsigned char* outend = out + *outlen;
491 unsigned short* in = (unsigned short*) inb;
492 unsigned short* inend;
493 unsigned int c, d, inlen;
494 unsigned char *tmp;
495 int bits;
496
497 if ((*inlenb % 2) == 1)
498 (*inlenb)--;
499 inlen = *inlenb / 2;
500 inend = in + inlen;
501 while ((in < inend) && (out - outstart + 5 < *outlen)) {
502 if (xmlLittleEndian) {
503 c= *in++;
504 } else {
505 tmp = (unsigned char *) in;
506 c = *tmp++;
507 c = c | (((unsigned int)*tmp) << 8);
508 in++;
509 }
510 if ((c & 0xFC00) == 0xD800) { /* surrogates */
511 if (in >= inend) { /* (in > inend) shouldn't happens */
512 break;
513 }
514 if (xmlLittleEndian) {
515 d = *in++;
516 } else {
517 tmp = (unsigned char *) in;
518 d = *tmp++;
519 d = d | (((unsigned int)*tmp) << 8);
520 in++;
521 }
522 if ((d & 0xFC00) == 0xDC00) {
523 c &= 0x03FF;
524 c <<= 10;
525 c |= d & 0x03FF;
526 c += 0x10000;
527 }
528 else {
529 *outlen = out - outstart;
530 *inlenb = processed - inb;
531 return(-2);
532 }
533 }
534
535 /* assertion: c is a single UTF-4 value */
536 if (out >= outend)
537 break;
538 if (c < 0x80) { *out++= c; bits= -6; }
539 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
540 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
541 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100542
Owen Taylor3473f882001-02-23 17:55:21 +0000543 for ( ; bits >= 0; bits-= 6) {
544 if (out >= outend)
545 break;
546 *out++= ((c >> bits) & 0x3F) | 0x80;
547 }
548 processed = (const unsigned char*) in;
549 }
550 *outlen = out - outstart;
551 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000552 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000553}
554
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000555#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000556/**
557 * UTF8ToUTF16LE:
558 * @outb: a pointer to an array of bytes to store the result
559 * @outlen: the length of @outb
560 * @in: a pointer to an array of UTF-8 chars
561 * @inlen: the length of @in
562 *
563 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
564 * block of chars out.
565 *
William M. Brackf9415e42003-11-28 09:39:10 +0000566 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100567 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000568 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000569static int
Owen Taylor3473f882001-02-23 17:55:21 +0000570UTF8ToUTF16LE(unsigned char* outb, int *outlen,
571 const unsigned char* in, int *inlen)
572{
573 unsigned short* out = (unsigned short*) outb;
574 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000575 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000576 unsigned short* outstart= out;
577 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000578 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000579 unsigned int c, d;
580 int trailing;
581 unsigned char *tmp;
582 unsigned short tmp1, tmp2;
583
William M. Brackf9415e42003-11-28 09:39:10 +0000584 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000585 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000586 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000587 *outlen = 0;
588 *inlen = 0;
589 return(0);
590 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000591 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000592 outend = out + (*outlen / 2);
593 while (in < inend) {
594 d= *in++;
595 if (d < 0x80) { c= d; trailing= 0; }
596 else if (d < 0xC0) {
597 /* trailing byte in leading position */
598 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000599 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000600 return(-2);
601 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
602 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
603 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
604 else {
605 /* no chance for this in UTF-16 */
606 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000607 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000608 return(-2);
609 }
610
611 if (inend - in < trailing) {
612 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100613 }
Owen Taylor3473f882001-02-23 17:55:21 +0000614
615 for ( ; trailing; trailing--) {
616 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
617 break;
618 c <<= 6;
619 c |= d & 0x3F;
620 }
621
622 /* assertion: c is a single UTF-4 value */
623 if (c < 0x10000) {
624 if (out >= outend)
625 break;
626 if (xmlLittleEndian) {
627 *out++ = c;
628 } else {
629 tmp = (unsigned char *) out;
630 *tmp = c ;
631 *(tmp + 1) = c >> 8 ;
632 out++;
633 }
634 }
635 else if (c < 0x110000) {
636 if (out+1 >= outend)
637 break;
638 c -= 0x10000;
639 if (xmlLittleEndian) {
640 *out++ = 0xD800 | (c >> 10);
641 *out++ = 0xDC00 | (c & 0x03FF);
642 } else {
643 tmp1 = 0xD800 | (c >> 10);
644 tmp = (unsigned char *) out;
645 *tmp = (unsigned char) tmp1;
646 *(tmp + 1) = tmp1 >> 8;
647 out++;
648
649 tmp2 = 0xDC00 | (c & 0x03FF);
650 tmp = (unsigned char *) out;
651 *tmp = (unsigned char) tmp2;
652 *(tmp + 1) = tmp2 >> 8;
653 out++;
654 }
655 }
656 else
657 break;
658 processed = in;
659 }
660 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000661 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000662 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000663}
664
665/**
William M. Brackf9415e42003-11-28 09:39:10 +0000666 * UTF8ToUTF16:
667 * @outb: a pointer to an array of bytes to store the result
668 * @outlen: the length of @outb
669 * @in: a pointer to an array of UTF-8 chars
670 * @inlen: the length of @in
671 *
672 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
673 * block of chars out.
674 *
675 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100676 * if the transcoding failed.
William M. Brackf9415e42003-11-28 09:39:10 +0000677 */
678static int
679UTF8ToUTF16(unsigned char* outb, int *outlen,
680 const unsigned char* in, int *inlen)
681{
682 if (in == NULL) {
683 /*
684 * initialization, add the Byte Order Mark for UTF-16LE
685 */
686 if (*outlen >= 2) {
687 outb[0] = 0xFF;
688 outb[1] = 0xFE;
689 *outlen = 2;
690 *inlen = 0;
691#ifdef DEBUG_ENCODING
692 xmlGenericError(xmlGenericErrorContext,
693 "Added FFFE Byte Order Mark\n");
694#endif
695 return(2);
696 }
697 *outlen = 0;
698 *inlen = 0;
699 return(0);
700 }
701 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
702}
William M. Brack030a7a12004-02-10 12:48:57 +0000703#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000704
705/**
Owen Taylor3473f882001-02-23 17:55:21 +0000706 * UTF16BEToUTF8:
707 * @out: a pointer to an array of bytes to store the result
708 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000709 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000710 * @inlenb: the length of @in in UTF-16 chars
711 *
712 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000713 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000714 * is the same between the native type of this machine and the
715 * inputed one.
716 *
William M. Brackf9415e42003-11-28 09:39:10 +0000717 * Returns the number of bytes written, or -1 if lack of space, or -2
718 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000719 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000720 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000721 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000722static int
Owen Taylor3473f882001-02-23 17:55:21 +0000723UTF16BEToUTF8(unsigned char* out, int *outlen,
724 const unsigned char* inb, int *inlenb)
725{
726 unsigned char* outstart = out;
727 const unsigned char* processed = inb;
728 unsigned char* outend = out + *outlen;
729 unsigned short* in = (unsigned short*) inb;
730 unsigned short* inend;
731 unsigned int c, d, inlen;
732 unsigned char *tmp;
733 int bits;
734
735 if ((*inlenb % 2) == 1)
736 (*inlenb)--;
737 inlen = *inlenb / 2;
738 inend= in + inlen;
739 while (in < inend) {
740 if (xmlLittleEndian) {
741 tmp = (unsigned char *) in;
742 c = *tmp++;
743 c = c << 8;
744 c = c | (unsigned int) *tmp;
745 in++;
746 } else {
747 c= *in++;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100748 }
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if ((c & 0xFC00) == 0xD800) { /* surrogates */
750 if (in >= inend) { /* (in > inend) shouldn't happens */
751 *outlen = out - outstart;
752 *inlenb = processed - inb;
753 return(-2);
754 }
755 if (xmlLittleEndian) {
756 tmp = (unsigned char *) in;
757 d = *tmp++;
758 d = d << 8;
759 d = d | (unsigned int) *tmp;
760 in++;
761 } else {
762 d= *in++;
763 }
764 if ((d & 0xFC00) == 0xDC00) {
765 c &= 0x03FF;
766 c <<= 10;
767 c |= d & 0x03FF;
768 c += 0x10000;
769 }
770 else {
771 *outlen = out - outstart;
772 *inlenb = processed - inb;
773 return(-2);
774 }
775 }
776
777 /* assertion: c is a single UTF-4 value */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100778 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000779 break;
780 if (c < 0x80) { *out++= c; bits= -6; }
781 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
782 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
783 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100784
Owen Taylor3473f882001-02-23 17:55:21 +0000785 for ( ; bits >= 0; bits-= 6) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100786 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000787 break;
788 *out++= ((c >> bits) & 0x3F) | 0x80;
789 }
790 processed = (const unsigned char*) in;
791 }
792 *outlen = out - outstart;
793 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000794 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000795}
796
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000797#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000798/**
799 * UTF8ToUTF16BE:
800 * @outb: a pointer to an array of bytes to store the result
801 * @outlen: the length of @outb
802 * @in: a pointer to an array of UTF-8 chars
803 * @inlen: the length of @in
804 *
805 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
806 * block of chars out.
807 *
808 * Returns the number of byte written, or -1 by lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100809 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000810 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000811static int
Owen Taylor3473f882001-02-23 17:55:21 +0000812UTF8ToUTF16BE(unsigned char* outb, int *outlen,
813 const unsigned char* in, int *inlen)
814{
815 unsigned short* out = (unsigned short*) outb;
816 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000817 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000818 unsigned short* outstart= out;
819 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000820 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000821 unsigned int c, d;
822 int trailing;
823 unsigned char *tmp;
824 unsigned short tmp1, tmp2;
825
William M. Brackf9415e42003-11-28 09:39:10 +0000826 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000827 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000828 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000829 *outlen = 0;
830 *inlen = 0;
831 return(0);
832 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000833 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000834 outend = out + (*outlen / 2);
835 while (in < inend) {
836 d= *in++;
837 if (d < 0x80) { c= d; trailing= 0; }
838 else if (d < 0xC0) {
839 /* trailing byte in leading position */
840 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000841 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000842 return(-2);
843 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
844 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
845 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
846 else {
847 /* no chance for this in UTF-16 */
848 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000849 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000850 return(-2);
851 }
852
853 if (inend - in < trailing) {
854 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100855 }
Owen Taylor3473f882001-02-23 17:55:21 +0000856
857 for ( ; trailing; trailing--) {
858 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
859 c <<= 6;
860 c |= d & 0x3F;
861 }
862
863 /* assertion: c is a single UTF-4 value */
864 if (c < 0x10000) {
865 if (out >= outend) break;
866 if (xmlLittleEndian) {
867 tmp = (unsigned char *) out;
868 *tmp = c >> 8;
869 *(tmp + 1) = c;
870 out++;
871 } else {
872 *out++ = c;
873 }
874 }
875 else if (c < 0x110000) {
876 if (out+1 >= outend) break;
877 c -= 0x10000;
878 if (xmlLittleEndian) {
879 tmp1 = 0xD800 | (c >> 10);
880 tmp = (unsigned char *) out;
881 *tmp = tmp1 >> 8;
882 *(tmp + 1) = (unsigned char) tmp1;
883 out++;
884
885 tmp2 = 0xDC00 | (c & 0x03FF);
886 tmp = (unsigned char *) out;
887 *tmp = tmp2 >> 8;
888 *(tmp + 1) = (unsigned char) tmp2;
889 out++;
890 } else {
891 *out++ = 0xD800 | (c >> 10);
892 *out++ = 0xDC00 | (c & 0x03FF);
893 }
894 }
895 else
896 break;
897 processed = in;
898 }
899 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000900 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000901 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000902}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000903#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000904
Daniel Veillard97ac1312001-05-30 19:14:17 +0000905/************************************************************************
906 * *
907 * Generic encoding handling routines *
908 * *
909 ************************************************************************/
910
Owen Taylor3473f882001-02-23 17:55:21 +0000911/**
912 * xmlDetectCharEncoding:
913 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000914 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000915 * @len: pointer to the length of the buffer
916 *
917 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000918 * according to the non-normative appendix F of the XML-1.0 recommendation.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100919 *
Owen Taylor3473f882001-02-23 17:55:21 +0000920 * Returns one of the XML_CHAR_ENCODING_... values.
921 */
922xmlCharEncoding
923xmlDetectCharEncoding(const unsigned char* in, int len)
924{
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100925 if (in == NULL)
Daniel Veillardce682bc2004-11-05 17:22:25 +0000926 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000927 if (len >= 4) {
928 if ((in[0] == 0x00) && (in[1] == 0x00) &&
929 (in[2] == 0x00) && (in[3] == 0x3C))
930 return(XML_CHAR_ENCODING_UCS4BE);
931 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
932 (in[2] == 0x00) && (in[3] == 0x00))
933 return(XML_CHAR_ENCODING_UCS4LE);
934 if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 (in[2] == 0x3C) && (in[3] == 0x00))
936 return(XML_CHAR_ENCODING_UCS4_2143);
937 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
938 (in[2] == 0x00) && (in[3] == 0x00))
939 return(XML_CHAR_ENCODING_UCS4_3412);
940 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
941 (in[2] == 0xA7) && (in[3] == 0x94))
942 return(XML_CHAR_ENCODING_EBCDIC);
943 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
944 (in[2] == 0x78) && (in[3] == 0x6D))
945 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000946 /*
947 * Although not part of the recommendation, we also
948 * attempt an "auto-recognition" of UTF-16LE and
949 * UTF-16BE encodings.
950 */
951 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 (in[2] == 0x3F) && (in[3] == 0x00))
953 return(XML_CHAR_ENCODING_UTF16LE);
954 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
955 (in[2] == 0x00) && (in[3] == 0x3F))
956 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000957 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000958 if (len >= 3) {
959 /*
960 * Errata on XML-1.0 June 20 2001
961 * We now allow an UTF8 encoded BOM
962 */
963 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
964 (in[2] == 0xBF))
965 return(XML_CHAR_ENCODING_UTF8);
966 }
William M. Brackf9415e42003-11-28 09:39:10 +0000967 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000968 if (len >= 2) {
969 if ((in[0] == 0xFE) && (in[1] == 0xFF))
970 return(XML_CHAR_ENCODING_UTF16BE);
971 if ((in[0] == 0xFF) && (in[1] == 0xFE))
972 return(XML_CHAR_ENCODING_UTF16LE);
973 }
974 return(XML_CHAR_ENCODING_NONE);
975}
976
977/**
978 * xmlCleanupEncodingAliases:
979 *
980 * Unregisters all aliases
981 */
982void
983xmlCleanupEncodingAliases(void) {
984 int i;
985
986 if (xmlCharEncodingAliases == NULL)
987 return;
988
989 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
990 if (xmlCharEncodingAliases[i].name != NULL)
991 xmlFree((char *) xmlCharEncodingAliases[i].name);
992 if (xmlCharEncodingAliases[i].alias != NULL)
993 xmlFree((char *) xmlCharEncodingAliases[i].alias);
994 }
995 xmlCharEncodingAliasesNb = 0;
996 xmlCharEncodingAliasesMax = 0;
997 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +0000998 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000999}
1000
1001/**
1002 * xmlGetEncodingAlias:
1003 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1004 *
1005 * Lookup an encoding name for the given alias.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 *
William M. Brackf9415e42003-11-28 09:39:10 +00001007 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +00001008 */
1009const char *
1010xmlGetEncodingAlias(const char *alias) {
1011 int i;
1012 char upper[100];
1013
1014 if (alias == NULL)
1015 return(NULL);
1016
1017 if (xmlCharEncodingAliases == NULL)
1018 return(NULL);
1019
1020 for (i = 0;i < 99;i++) {
1021 upper[i] = toupper(alias[i]);
1022 if (upper[i] == 0) break;
1023 }
1024 upper[i] = 0;
1025
1026 /*
1027 * Walk down the list looking for a definition of the alias
1028 */
1029 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1030 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1031 return(xmlCharEncodingAliases[i].name);
1032 }
1033 }
1034 return(NULL);
1035}
1036
1037/**
1038 * xmlAddEncodingAlias:
1039 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1040 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1041 *
William M. Brackf9415e42003-11-28 09:39:10 +00001042 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +00001043 * will be overwritten.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001044 *
Owen Taylor3473f882001-02-23 17:55:21 +00001045 * Returns 0 in case of success, -1 in case of error
1046 */
1047int
1048xmlAddEncodingAlias(const char *name, const char *alias) {
1049 int i;
1050 char upper[100];
1051
1052 if ((name == NULL) || (alias == NULL))
1053 return(-1);
1054
1055 for (i = 0;i < 99;i++) {
1056 upper[i] = toupper(alias[i]);
1057 if (upper[i] == 0) break;
1058 }
1059 upper[i] = 0;
1060
1061 if (xmlCharEncodingAliases == NULL) {
1062 xmlCharEncodingAliasesNb = 0;
1063 xmlCharEncodingAliasesMax = 20;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001064 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001065 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1066 if (xmlCharEncodingAliases == NULL)
1067 return(-1);
1068 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1069 xmlCharEncodingAliasesMax *= 2;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001070 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001071 xmlRealloc(xmlCharEncodingAliases,
1072 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1073 }
1074 /*
1075 * Walk down the list looking for a definition of the alias
1076 */
1077 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1078 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1079 /*
1080 * Replace the definition.
1081 */
1082 xmlFree((char *) xmlCharEncodingAliases[i].name);
1083 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1084 return(0);
1085 }
1086 }
1087 /*
1088 * Add the definition
1089 */
1090 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1091 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1092 xmlCharEncodingAliasesNb++;
1093 return(0);
1094}
1095
1096/**
1097 * xmlDelEncodingAlias:
1098 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1099 *
1100 * Unregisters an encoding alias @alias
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001101 *
Owen Taylor3473f882001-02-23 17:55:21 +00001102 * Returns 0 in case of success, -1 in case of error
1103 */
1104int
1105xmlDelEncodingAlias(const char *alias) {
1106 int i;
1107
1108 if (alias == NULL)
1109 return(-1);
1110
1111 if (xmlCharEncodingAliases == NULL)
1112 return(-1);
1113 /*
1114 * Walk down the list looking for a definition of the alias
1115 */
1116 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1117 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1118 xmlFree((char *) xmlCharEncodingAliases[i].name);
1119 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1120 xmlCharEncodingAliasesNb--;
1121 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1122 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1123 return(0);
1124 }
1125 }
1126 return(-1);
1127}
1128
1129/**
1130 * xmlParseCharEncoding:
1131 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1132 *
William M. Brackf9415e42003-11-28 09:39:10 +00001133 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001134 * that the comparison is case insensitive accordingly to the section
1135 * [XML] 4.3.3 Character Encoding in Entities.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001136 *
Owen Taylor3473f882001-02-23 17:55:21 +00001137 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1138 * if not recognized.
1139 */
1140xmlCharEncoding
1141xmlParseCharEncoding(const char* name)
1142{
1143 const char *alias;
1144 char upper[500];
1145 int i;
1146
1147 if (name == NULL)
1148 return(XML_CHAR_ENCODING_NONE);
1149
1150 /*
1151 * Do the alias resolution
1152 */
1153 alias = xmlGetEncodingAlias(name);
1154 if (alias != NULL)
1155 name = alias;
1156
1157 for (i = 0;i < 499;i++) {
1158 upper[i] = toupper(name[i]);
1159 if (upper[i] == 0) break;
1160 }
1161 upper[i] = 0;
1162
1163 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1164 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1165 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1166
1167 /*
1168 * NOTE: if we were able to parse this, the endianness of UTF16 is
1169 * already found and in use
1170 */
1171 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1172 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001173
Owen Taylor3473f882001-02-23 17:55:21 +00001174 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1175 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1176 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1177
1178 /*
1179 * NOTE: if we were able to parse this, the endianness of UCS4 is
1180 * already found and in use
1181 */
1182 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1183 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1184 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1185
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001186
Owen Taylor3473f882001-02-23 17:55:21 +00001187 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1188 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1189 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1190
1191 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1192 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1193 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1194
1195 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1196 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1197 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1198 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1199 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1200 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1201 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1202
1203 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1204 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1205 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1206
1207#ifdef DEBUG_ENCODING
1208 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1209#endif
1210 return(XML_CHAR_ENCODING_ERROR);
1211}
1212
1213/**
1214 * xmlGetCharEncodingName:
1215 * @enc: the encoding
1216 *
1217 * The "canonical" name for XML encoding.
1218 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1219 * Section 4.3.3 Character Encoding in Entities
1220 *
1221 * Returns the canonical name for the given encoding
1222 */
1223
1224const char*
1225xmlGetCharEncodingName(xmlCharEncoding enc) {
1226 switch (enc) {
1227 case XML_CHAR_ENCODING_ERROR:
1228 return(NULL);
1229 case XML_CHAR_ENCODING_NONE:
1230 return(NULL);
1231 case XML_CHAR_ENCODING_UTF8:
1232 return("UTF-8");
1233 case XML_CHAR_ENCODING_UTF16LE:
1234 return("UTF-16");
1235 case XML_CHAR_ENCODING_UTF16BE:
1236 return("UTF-16");
1237 case XML_CHAR_ENCODING_EBCDIC:
1238 return("EBCDIC");
1239 case XML_CHAR_ENCODING_UCS4LE:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4BE:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS4_2143:
1244 return("ISO-10646-UCS-4");
1245 case XML_CHAR_ENCODING_UCS4_3412:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS2:
1248 return("ISO-10646-UCS-2");
1249 case XML_CHAR_ENCODING_8859_1:
1250 return("ISO-8859-1");
1251 case XML_CHAR_ENCODING_8859_2:
1252 return("ISO-8859-2");
1253 case XML_CHAR_ENCODING_8859_3:
1254 return("ISO-8859-3");
1255 case XML_CHAR_ENCODING_8859_4:
1256 return("ISO-8859-4");
1257 case XML_CHAR_ENCODING_8859_5:
1258 return("ISO-8859-5");
1259 case XML_CHAR_ENCODING_8859_6:
1260 return("ISO-8859-6");
1261 case XML_CHAR_ENCODING_8859_7:
1262 return("ISO-8859-7");
1263 case XML_CHAR_ENCODING_8859_8:
1264 return("ISO-8859-8");
1265 case XML_CHAR_ENCODING_8859_9:
1266 return("ISO-8859-9");
1267 case XML_CHAR_ENCODING_2022_JP:
1268 return("ISO-2022-JP");
1269 case XML_CHAR_ENCODING_SHIFT_JIS:
1270 return("Shift-JIS");
1271 case XML_CHAR_ENCODING_EUC_JP:
1272 return("EUC-JP");
1273 case XML_CHAR_ENCODING_ASCII:
1274 return(NULL);
1275 }
1276 return(NULL);
1277}
1278
Daniel Veillard97ac1312001-05-30 19:14:17 +00001279/************************************************************************
1280 * *
1281 * Char encoding handlers *
1282 * *
1283 ************************************************************************/
1284
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286/* the size should be growable, but it's not a big deal ... */
1287#define MAX_ENCODING_HANDLERS 50
1288static xmlCharEncodingHandlerPtr *handlers = NULL;
1289static int nbCharEncodingHandler = 0;
1290
1291/*
1292 * The default is UTF-8 for XML, that's also the default used for the
1293 * parser internals, so the default encoding handler is NULL
1294 */
1295
1296static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1297
1298/**
1299 * xmlNewCharEncodingHandler:
1300 * @name: the encoding name, in UTF-8 format (ASCII actually)
1301 * @input: the xmlCharEncodingInputFunc to read that encoding
1302 * @output: the xmlCharEncodingOutputFunc to write that encoding
1303 *
1304 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001305 *
Owen Taylor3473f882001-02-23 17:55:21 +00001306 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1307 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001308xmlCharEncodingHandlerPtr
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001309xmlNewCharEncodingHandler(const char *name,
Owen Taylor3473f882001-02-23 17:55:21 +00001310 xmlCharEncodingInputFunc input,
1311 xmlCharEncodingOutputFunc output) {
1312 xmlCharEncodingHandlerPtr handler;
1313 const char *alias;
1314 char upper[500];
1315 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001316 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001317
1318 /*
1319 * Do the alias resolution
1320 */
1321 alias = xmlGetEncodingAlias(name);
1322 if (alias != NULL)
1323 name = alias;
1324
1325 /*
1326 * Keep only the uppercase version of the encoding.
1327 */
1328 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001329 xmlEncodingErr(XML_I18N_NO_NAME,
1330 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001331 return(NULL);
1332 }
1333 for (i = 0;i < 499;i++) {
1334 upper[i] = toupper(name[i]);
1335 if (upper[i] == 0) break;
1336 }
1337 upper[i] = 0;
1338 up = xmlMemStrdup(upper);
1339 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001340 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001341 return(NULL);
1342 }
1343
1344 /*
1345 * allocate and fill-up an handler block.
1346 */
1347 handler = (xmlCharEncodingHandlerPtr)
1348 xmlMalloc(sizeof(xmlCharEncodingHandler));
1349 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001350 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001351 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001352 return(NULL);
1353 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001354 memset(handler, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001355 handler->input = input;
1356 handler->output = output;
1357 handler->name = up;
1358
1359#ifdef LIBXML_ICONV_ENABLED
1360 handler->iconv_in = NULL;
1361 handler->iconv_out = NULL;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001362#endif
1363#ifdef LIBXML_ICU_ENABLED
1364 handler->uconv_in = NULL;
1365 handler->uconv_out = NULL;
1366#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001367
1368 /*
1369 * registers and returns the handler.
1370 */
1371 xmlRegisterCharEncodingHandler(handler);
1372#ifdef DEBUG_ENCODING
1373 xmlGenericError(xmlGenericErrorContext,
1374 "Registered encoding handler for %s\n", name);
1375#endif
1376 return(handler);
1377}
1378
1379/**
1380 * xmlInitCharEncodingHandlers:
1381 *
1382 * Initialize the char encoding support, it registers the default
1383 * encoding supported.
1384 * NOTE: while public, this function usually doesn't need to be called
1385 * in normal processing.
1386 */
1387void
1388xmlInitCharEncodingHandlers(void) {
1389 unsigned short int tst = 0x1234;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001390 unsigned char *ptr = (unsigned char *) &tst;
Owen Taylor3473f882001-02-23 17:55:21 +00001391
1392 if (handlers != NULL) return;
1393
1394 handlers = (xmlCharEncodingHandlerPtr *)
1395 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1396
1397 if (*ptr == 0x12) xmlLittleEndian = 0;
1398 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001399 else {
1400 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1401 "Odd problem at endianness detection\n", NULL);
1402 }
Owen Taylor3473f882001-02-23 17:55:21 +00001403
1404 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001405 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001406 return;
1407 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001408 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001409#ifdef LIBXML_OUTPUT_ENABLED
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001410 xmlUTF16LEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001411 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001412 xmlUTF16BEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001413 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001414 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001415 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1416 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001417 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001418#ifdef LIBXML_HTML_ENABLED
1419 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1420#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001421#else
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001422 xmlUTF16LEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001423 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001424 xmlUTF16BEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001425 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001426 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001427 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1428 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1429 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1430#endif /* LIBXML_OUTPUT_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001431#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001432#ifdef LIBXML_ISO8859X_ENABLED
1433 xmlRegisterCharEncodingHandlersISO8859x ();
1434#endif
1435#endif
1436
Owen Taylor3473f882001-02-23 17:55:21 +00001437}
1438
1439/**
1440 * xmlCleanupCharEncodingHandlers:
1441 *
1442 * Cleanup the memory allocated for the char encoding support, it
1443 * unregisters all the encoding handlers and the aliases.
1444 */
1445void
1446xmlCleanupCharEncodingHandlers(void) {
1447 xmlCleanupEncodingAliases();
1448
1449 if (handlers == NULL) return;
1450
1451 for (;nbCharEncodingHandler > 0;) {
1452 nbCharEncodingHandler--;
1453 if (handlers[nbCharEncodingHandler] != NULL) {
1454 if (handlers[nbCharEncodingHandler]->name != NULL)
1455 xmlFree(handlers[nbCharEncodingHandler]->name);
1456 xmlFree(handlers[nbCharEncodingHandler]);
1457 }
1458 }
1459 xmlFree(handlers);
1460 handlers = NULL;
1461 nbCharEncodingHandler = 0;
1462 xmlDefaultCharEncodingHandler = NULL;
1463}
1464
1465/**
1466 * xmlRegisterCharEncodingHandler:
1467 * @handler: the xmlCharEncodingHandlerPtr handler block
1468 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001469 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001470 */
1471void
1472xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1473 if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard76d36452009-09-07 11:19:33 +02001474 if ((handler == NULL) || (handlers == NULL)) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001475 xmlEncodingErr(XML_I18N_NO_HANDLER,
1476 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001477 return;
1478 }
1479
1480 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001481 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1482 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1483 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001484 return;
1485 }
1486 handlers[nbCharEncodingHandler++] = handler;
1487}
1488
1489/**
1490 * xmlGetCharEncodingHandler:
1491 * @enc: an xmlCharEncoding value.
1492 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001493 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001494 *
1495 * Returns the handler or NULL if not found
1496 */
1497xmlCharEncodingHandlerPtr
1498xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1499 xmlCharEncodingHandlerPtr handler;
1500
1501 if (handlers == NULL) xmlInitCharEncodingHandlers();
1502 switch (enc) {
1503 case XML_CHAR_ENCODING_ERROR:
1504 return(NULL);
1505 case XML_CHAR_ENCODING_NONE:
1506 return(NULL);
1507 case XML_CHAR_ENCODING_UTF8:
1508 return(NULL);
1509 case XML_CHAR_ENCODING_UTF16LE:
1510 return(xmlUTF16LEHandler);
1511 case XML_CHAR_ENCODING_UTF16BE:
1512 return(xmlUTF16BEHandler);
1513 case XML_CHAR_ENCODING_EBCDIC:
1514 handler = xmlFindCharEncodingHandler("EBCDIC");
1515 if (handler != NULL) return(handler);
1516 handler = xmlFindCharEncodingHandler("ebcdic");
1517 if (handler != NULL) return(handler);
Martin Köglerc78988a2009-08-24 16:47:48 +02001518 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1519 if (handler != NULL) return(handler);
Petr Sumbera6f49c732012-12-12 15:41:30 +08001520 handler = xmlFindCharEncodingHandler("IBM-037");
1521 if (handler != NULL) return(handler);
Owen Taylor3473f882001-02-23 17:55:21 +00001522 break;
1523 case XML_CHAR_ENCODING_UCS4BE:
1524 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1525 if (handler != NULL) return(handler);
1526 handler = xmlFindCharEncodingHandler("UCS-4");
1527 if (handler != NULL) return(handler);
1528 handler = xmlFindCharEncodingHandler("UCS4");
1529 if (handler != NULL) return(handler);
1530 break;
1531 case XML_CHAR_ENCODING_UCS4LE:
1532 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1533 if (handler != NULL) return(handler);
1534 handler = xmlFindCharEncodingHandler("UCS-4");
1535 if (handler != NULL) return(handler);
1536 handler = xmlFindCharEncodingHandler("UCS4");
1537 if (handler != NULL) return(handler);
1538 break;
1539 case XML_CHAR_ENCODING_UCS4_2143:
1540 break;
1541 case XML_CHAR_ENCODING_UCS4_3412:
1542 break;
1543 case XML_CHAR_ENCODING_UCS2:
1544 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1545 if (handler != NULL) return(handler);
1546 handler = xmlFindCharEncodingHandler("UCS-2");
1547 if (handler != NULL) return(handler);
1548 handler = xmlFindCharEncodingHandler("UCS2");
1549 if (handler != NULL) return(handler);
1550 break;
1551
1552 /*
1553 * We used to keep ISO Latin encodings native in the
1554 * generated data. This led to so many problems that
1555 * this has been removed. One can still change this
1556 * back by registering no-ops encoders for those
1557 */
1558 case XML_CHAR_ENCODING_8859_1:
1559 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1560 if (handler != NULL) return(handler);
1561 break;
1562 case XML_CHAR_ENCODING_8859_2:
1563 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1564 if (handler != NULL) return(handler);
1565 break;
1566 case XML_CHAR_ENCODING_8859_3:
1567 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1568 if (handler != NULL) return(handler);
1569 break;
1570 case XML_CHAR_ENCODING_8859_4:
1571 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1572 if (handler != NULL) return(handler);
1573 break;
1574 case XML_CHAR_ENCODING_8859_5:
1575 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1576 if (handler != NULL) return(handler);
1577 break;
1578 case XML_CHAR_ENCODING_8859_6:
1579 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1580 if (handler != NULL) return(handler);
1581 break;
1582 case XML_CHAR_ENCODING_8859_7:
1583 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1584 if (handler != NULL) return(handler);
1585 break;
1586 case XML_CHAR_ENCODING_8859_8:
1587 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1588 if (handler != NULL) return(handler);
1589 break;
1590 case XML_CHAR_ENCODING_8859_9:
1591 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1592 if (handler != NULL) return(handler);
1593 break;
1594
1595
1596 case XML_CHAR_ENCODING_2022_JP:
1597 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1598 if (handler != NULL) return(handler);
1599 break;
1600 case XML_CHAR_ENCODING_SHIFT_JIS:
1601 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1602 if (handler != NULL) return(handler);
1603 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1604 if (handler != NULL) return(handler);
1605 handler = xmlFindCharEncodingHandler("Shift_JIS");
1606 if (handler != NULL) return(handler);
1607 break;
1608 case XML_CHAR_ENCODING_EUC_JP:
1609 handler = xmlFindCharEncodingHandler("EUC-JP");
1610 if (handler != NULL) return(handler);
1611 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001612 default:
Owen Taylor3473f882001-02-23 17:55:21 +00001613 break;
1614 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001615
Owen Taylor3473f882001-02-23 17:55:21 +00001616#ifdef DEBUG_ENCODING
1617 xmlGenericError(xmlGenericErrorContext,
1618 "No handler found for encoding %d\n", enc);
1619#endif
1620 return(NULL);
1621}
1622
1623/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001624 * xmlFindCharEncodingHandler:
1625 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001626 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001627 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001628 *
1629 * Returns the handler or NULL if not found
1630 */
1631xmlCharEncodingHandlerPtr
1632xmlFindCharEncodingHandler(const char *name) {
1633 const char *nalias;
1634 const char *norig;
1635 xmlCharEncoding alias;
1636#ifdef LIBXML_ICONV_ENABLED
1637 xmlCharEncodingHandlerPtr enc;
1638 iconv_t icv_in, icv_out;
1639#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001640#ifdef LIBXML_ICU_ENABLED
1641 xmlCharEncodingHandlerPtr encu;
1642 uconv_t *ucv_in, *ucv_out;
1643#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001644 char upper[100];
1645 int i;
1646
1647 if (handlers == NULL) xmlInitCharEncodingHandlers();
1648 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1649 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1650
1651 /*
1652 * Do the alias resolution
1653 */
1654 norig = name;
1655 nalias = xmlGetEncodingAlias(name);
1656 if (nalias != NULL)
1657 name = nalias;
1658
1659 /*
1660 * Check first for directly registered encoding names
1661 */
1662 for (i = 0;i < 99;i++) {
1663 upper[i] = toupper(name[i]);
1664 if (upper[i] == 0) break;
1665 }
1666 upper[i] = 0;
1667
Daniel Veillardd44b9362009-09-07 12:15:08 +02001668 if (handlers != NULL) {
1669 for (i = 0;i < nbCharEncodingHandler; i++) {
1670 if (!strcmp(upper, handlers[i]->name)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001671#ifdef DEBUG_ENCODING
Daniel Veillardd44b9362009-09-07 12:15:08 +02001672 xmlGenericError(xmlGenericErrorContext,
1673 "Found registered handler for encoding %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001674#endif
Daniel Veillardd44b9362009-09-07 12:15:08 +02001675 return(handlers[i]);
1676 }
1677 }
1678 }
Owen Taylor3473f882001-02-23 17:55:21 +00001679
1680#ifdef LIBXML_ICONV_ENABLED
1681 /* check whether iconv can handle this */
1682 icv_in = iconv_open("UTF-8", name);
1683 icv_out = iconv_open(name, "UTF-8");
Daniel Veillard28aac0b2006-10-16 08:31:18 +00001684 if (icv_in == (iconv_t) -1) {
1685 icv_in = iconv_open("UTF-8", upper);
1686 }
1687 if (icv_out == (iconv_t) -1) {
1688 icv_out = iconv_open(upper, "UTF-8");
1689 }
Owen Taylor3473f882001-02-23 17:55:21 +00001690 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1691 enc = (xmlCharEncodingHandlerPtr)
1692 xmlMalloc(sizeof(xmlCharEncodingHandler));
1693 if (enc == NULL) {
1694 iconv_close(icv_in);
1695 iconv_close(icv_out);
1696 return(NULL);
1697 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001698 memset(enc, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001699 enc->name = xmlMemStrdup(name);
1700 enc->input = NULL;
1701 enc->output = NULL;
1702 enc->iconv_in = icv_in;
1703 enc->iconv_out = icv_out;
1704#ifdef DEBUG_ENCODING
1705 xmlGenericError(xmlGenericErrorContext,
1706 "Found iconv handler for encoding %s\n", name);
1707#endif
1708 return enc;
1709 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001710 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001711 "iconv : problems with filters for '%s'\n", name);
1712 }
1713#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001714#ifdef LIBXML_ICU_ENABLED
1715 /* check whether icu can handle this */
1716 ucv_in = openIcuConverter(name, 1);
1717 ucv_out = openIcuConverter(name, 0);
1718 if (ucv_in != NULL && ucv_out != NULL) {
1719 encu = (xmlCharEncodingHandlerPtr)
1720 xmlMalloc(sizeof(xmlCharEncodingHandler));
1721 if (encu == NULL) {
1722 closeIcuConverter(ucv_in);
1723 closeIcuConverter(ucv_out);
1724 return(NULL);
1725 }
1726 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1727 encu->name = xmlMemStrdup(name);
1728 encu->input = NULL;
1729 encu->output = NULL;
1730 encu->uconv_in = ucv_in;
1731 encu->uconv_out = ucv_out;
1732#ifdef DEBUG_ENCODING
1733 xmlGenericError(xmlGenericErrorContext,
1734 "Found ICU converter handler for encoding %s\n", name);
1735#endif
1736 return encu;
1737 } else if (ucv_in != NULL || ucv_out != NULL) {
1738 closeIcuConverter(ucv_in);
1739 closeIcuConverter(ucv_out);
1740 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1741 "ICU converter : problems with filters for '%s'\n", name);
1742 }
1743#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001744
1745#ifdef DEBUG_ENCODING
1746 xmlGenericError(xmlGenericErrorContext,
1747 "No handler found for encoding %s\n", name);
1748#endif
1749
1750 /*
1751 * Fallback using the canonical names
1752 */
1753 alias = xmlParseCharEncoding(norig);
1754 if (alias != XML_CHAR_ENCODING_ERROR) {
1755 const char* canon;
1756 canon = xmlGetCharEncodingName(alias);
1757 if ((canon != NULL) && (strcmp(name, canon))) {
1758 return(xmlFindCharEncodingHandler(canon));
1759 }
1760 }
1761
William M. Brackf9415e42003-11-28 09:39:10 +00001762 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001763 return(NULL);
1764}
1765
Daniel Veillard97ac1312001-05-30 19:14:17 +00001766/************************************************************************
1767 * *
1768 * ICONV based generic conversion functions *
1769 * *
1770 ************************************************************************/
1771
Owen Taylor3473f882001-02-23 17:55:21 +00001772#ifdef LIBXML_ICONV_ENABLED
1773/**
1774 * xmlIconvWrapper:
1775 * @cd: iconv converter data structure
1776 * @out: a pointer to an array of bytes to store the result
1777 * @outlen: the length of @out
1778 * @in: a pointer to an array of ISO Latin 1 chars
1779 * @inlen: the length of @in
1780 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001781 * Returns 0 if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001782 * -1 by lack of space, or
1783 * -2 if the transcoding fails (for *in is not valid utf8 string or
1784 * the result of transformation can't fit into the encoding we want), or
1785 * -3 if there the last byte can't form a single output char.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001786 *
Owen Taylor3473f882001-02-23 17:55:21 +00001787 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001788 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001789 * The value of @outlen after return is the number of ocetes consumed.
1790 */
1791static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001792xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1793 const unsigned char *in, int *inlen) {
1794 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001795 const char *icv_in = (const char *) in;
1796 char *icv_out = (char *) out;
1797 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001798
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001799 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1800 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001801 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001802 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001803 icv_inlen = *inlen;
1804 icv_outlen = *outlen;
Daniel Veillard8e1a46d2008-02-15 07:47:26 +00001805 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard2728f842006-03-09 16:49:24 +00001806 *inlen -= icv_inlen;
1807 *outlen -= icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001808 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001809#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001810 if (errno == EILSEQ) {
1811 return -2;
1812 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001813#endif
1814#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001815 if (errno == E2BIG) {
1816 return -1;
1817 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001818#endif
1819#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001820 if (errno == EINVAL) {
1821 return -3;
1822 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001823#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001824 {
1825 return -3;
1826 }
1827 }
1828 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001829}
1830#endif /* LIBXML_ICONV_ENABLED */
1831
Daniel Veillard97ac1312001-05-30 19:14:17 +00001832/************************************************************************
1833 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001834 * ICU based generic conversion functions *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001835 * *
1836 ************************************************************************/
1837
1838#ifdef LIBXML_ICU_ENABLED
1839/**
1840 * xmlUconvWrapper:
1841 * @cd: ICU uconverter data structure
1842 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1843 * @out: a pointer to an array of bytes to store the result
1844 * @outlen: the length of @out
1845 * @in: a pointer to an array of ISO Latin 1 chars
1846 * @inlen: the length of @in
1847 *
1848 * Returns 0 if success, or
1849 * -1 by lack of space, or
1850 * -2 if the transcoding fails (for *in is not valid utf8 string or
1851 * the result of transformation can't fit into the encoding we want), or
1852 * -3 if there the last byte can't form a single output char.
1853 *
1854 * The value of @inlen after return is the number of octets consumed
1855 * as the return value is positive, else unpredictable.
1856 * The value of @outlen after return is the number of ocetes consumed.
1857 */
1858static int
1859xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1860 const unsigned char *in, int *inlen) {
1861 const char *ucv_in = (const char *) in;
1862 char *ucv_out = (char *) out;
1863 UErrorCode err = U_ZERO_ERROR;
1864
1865 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1866 if (outlen != NULL) *outlen = 0;
1867 return(-1);
1868 }
1869
1870 /*
1871 * TODO(jungshik)
1872 * 1. is ucnv_convert(To|From)Algorithmic better?
1873 * 2. had we better use an explicit pivot buffer?
1874 * 3. error returned comes from 'fromUnicode' only even
1875 * when toUnicode is true !
1876 */
1877 if (toUnicode) {
1878 /* encoding => UTF-16 => UTF-8 */
1879 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1880 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1881 0, TRUE, &err);
1882 } else {
1883 /* UTF-8 => UTF-16 => encoding */
1884 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1885 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1886 0, TRUE, &err);
1887 }
1888 *inlen = ucv_in - (const char*) in;
1889 *outlen = ucv_out - (char *) out;
1890 if (U_SUCCESS(err))
1891 return 0;
1892 if (err == U_BUFFER_OVERFLOW_ERROR)
1893 return -1;
1894 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1895 return -2;
1896 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1897 return -3;
1898}
1899#endif /* LIBXML_ICU_ENABLED */
1900
1901/************************************************************************
1902 * *
Daniel Veillard97ac1312001-05-30 19:14:17 +00001903 * The real API used by libxml for on-the-fly conversion *
1904 * *
1905 ************************************************************************/
1906
Owen Taylor3473f882001-02-23 17:55:21 +00001907/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001908 * xmlCharEncFirstLineInt:
Owen Taylor3473f882001-02-23 17:55:21 +00001909 * @handler: char enconding transformation data structure
1910 * @out: an xmlBuffer for the output.
1911 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001912 * @len: number of bytes to convert for the first line, or -1
1913 *
Owen Taylor3473f882001-02-23 17:55:21 +00001914 * Front-end for the encoding handler input function, but handle only
1915 * the very first line, i.e. limit itself to 45 chars.
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001916 *
1917 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001918 * -1 general error
1919 * -2 if the transcoding fails (for *in is not valid utf8 string or
1920 * the result of transformation can't fit into the encoding we want), or
1921 */
1922int
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001923xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1924 xmlBufferPtr in, int len) {
Owen Taylor3473f882001-02-23 17:55:21 +00001925 int ret = -2;
1926 int written;
1927 int toconv;
1928
1929 if (handler == NULL) return(-1);
1930 if (out == NULL) return(-1);
1931 if (in == NULL) return(-1);
1932
William M. Brack38d452a2007-05-22 16:00:06 +00001933 /* calculate space available */
Daniel Veillard69f04562011-08-19 11:05:04 +08001934 written = out->size - out->use - 1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00001935 toconv = in->use;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 /*
1937 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1938 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001939 * declaration without going too far inside the document content.
Daniel Veillard57c9db02008-03-06 14:37:10 +00001940 * on UTF-16 this means 90bytes, on UCS4 this means 180
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001941 * The actual value depending on guessed encoding is passed as @len
1942 * if provided
Owen Taylor3473f882001-02-23 17:55:21 +00001943 */
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001944 if (len >= 0) {
1945 if (toconv > len)
1946 toconv = len;
1947 } else {
1948 if (toconv > 180)
1949 toconv = 180;
1950 }
William M. Brack38d452a2007-05-22 16:00:06 +00001951 if (toconv * 2 >= written) {
Daniel Veillard18d0db22012-07-13 19:51:15 +08001952 xmlBufferGrow(out, toconv * 2);
William M. Brack38d452a2007-05-22 16:00:06 +00001953 written = out->size - out->use - 1;
1954 }
Owen Taylor3473f882001-02-23 17:55:21 +00001955
1956 if (handler->input != NULL) {
1957 ret = handler->input(&out->content[out->use], &written,
1958 in->content, &toconv);
1959 xmlBufferShrink(in, toconv);
1960 out->use += written;
1961 out->content[out->use] = 0;
1962 }
1963#ifdef LIBXML_ICONV_ENABLED
1964 else if (handler->iconv_in != NULL) {
1965 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1966 &written, in->content, &toconv);
1967 xmlBufferShrink(in, toconv);
1968 out->use += written;
1969 out->content[out->use] = 0;
1970 if (ret == -1) ret = -3;
1971 }
1972#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001973#ifdef LIBXML_ICU_ENABLED
1974 else if (handler->uconv_in != NULL) {
1975 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1976 &written, in->content, &toconv);
1977 xmlBufferShrink(in, toconv);
1978 out->use += written;
1979 out->content[out->use] = 0;
1980 if (ret == -1) ret = -3;
1981 }
1982#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001983#ifdef DEBUG_ENCODING
1984 switch (ret) {
1985 case 0:
1986 xmlGenericError(xmlGenericErrorContext,
1987 "converted %d bytes to %d bytes of input\n",
1988 toconv, written);
1989 break;
1990 case -1:
1991 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1992 toconv, written, in->use);
1993 break;
1994 case -2:
1995 xmlGenericError(xmlGenericErrorContext,
1996 "input conversion failed due to input error\n");
1997 break;
1998 case -3:
1999 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2000 toconv, written, in->use);
2001 break;
2002 default:
2003 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2004 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002005#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002006 /*
2007 * Ignore when input buffer is not on a boundary
2008 */
2009 if (ret == -3) ret = 0;
2010 if (ret == -1) ret = 0;
2011 return(ret);
2012}
2013
2014/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002015 * xmlCharEncFirstLine:
2016 * @handler: char enconding transformation data structure
2017 * @out: an xmlBuffer for the output.
2018 * @in: an xmlBuffer for the input
2019 *
2020 * Front-end for the encoding handler input function, but handle only
2021 * the very first line, i.e. limit itself to 45 chars.
2022 *
2023 * Returns the number of byte written if success, or
2024 * -1 general error
2025 * -2 if the transcoding fails (for *in is not valid utf8 string or
2026 * the result of transformation can't fit into the encoding we want), or
2027 */
2028int
2029xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2030 xmlBufferPtr in) {
2031 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2032}
2033
2034/**
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002035 * xmlCharEncFirstLineInput:
Daniel Veillard18d0db22012-07-13 19:51:15 +08002036 * @input: a parser input buffer
2037 * @len: number of bytes to convert for the first line, or -1
2038 *
2039 * Front-end for the encoding handler input function, but handle only
2040 * the very first line. Point is that this is based on autodetection
2041 * of the encoding and once that first line is converted we may find
2042 * out that a different decoder is needed to process the input.
2043 *
2044 * Returns the number of byte written if success, or
2045 * -1 general error
2046 * -2 if the transcoding fails (for *in is not valid utf8 string or
2047 * the result of transformation can't fit into the encoding we want), or
2048 */
2049int
2050xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2051{
2052 int ret = -2;
2053 size_t written;
2054 size_t toconv;
2055 int c_in;
2056 int c_out;
2057 xmlBufPtr in;
2058 xmlBufPtr out;
2059
2060 if ((input == NULL) || (input->encoder == NULL) ||
2061 (input->buffer == NULL) || (input->raw == NULL))
2062 return (-1);
2063 out = input->buffer;
2064 in = input->raw;
2065
2066 toconv = xmlBufUse(in);
2067 if (toconv == 0)
2068 return (0);
2069 written = xmlBufAvail(out) - 1; /* count '\0' */
2070 /*
2071 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2072 * 45 chars should be sufficient to reach the end of the encoding
2073 * declaration without going too far inside the document content.
2074 * on UTF-16 this means 90bytes, on UCS4 this means 180
2075 * The actual value depending on guessed encoding is passed as @len
2076 * if provided
2077 */
2078 if (len >= 0) {
2079 if (toconv > (unsigned int) len)
2080 toconv = len;
2081 } else {
2082 if (toconv > 180)
2083 toconv = 180;
2084 }
2085 if (toconv * 2 >= written) {
2086 xmlBufGrow(out, toconv * 2);
2087 written = xmlBufAvail(out) - 1;
2088 }
2089 if (written > 360)
2090 written = 360;
2091
2092 c_in = toconv;
2093 c_out = written;
2094 if (input->encoder->input != NULL) {
2095 ret = input->encoder->input(xmlBufEnd(out), &c_out,
2096 xmlBufContent(in), &c_in);
2097 xmlBufShrink(in, c_in);
2098 xmlBufAddLen(out, c_out);
2099 }
2100#ifdef LIBXML_ICONV_ENABLED
2101 else if (input->encoder->iconv_in != NULL) {
2102 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2103 &c_out, xmlBufContent(in), &c_in);
2104 xmlBufShrink(in, c_in);
2105 xmlBufAddLen(out, c_out);
2106 if (ret == -1)
2107 ret = -3;
2108 }
2109#endif /* LIBXML_ICONV_ENABLED */
2110#ifdef LIBXML_ICU_ENABLED
2111 else if (input->encoder->uconv_in != NULL) {
2112 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2113 &c_out, xmlBufContent(in), &c_in);
2114 xmlBufShrink(in, c_in);
2115 xmlBufAddLen(out, c_out);
2116 if (ret == -1)
2117 ret = -3;
2118 }
2119#endif /* LIBXML_ICU_ENABLED */
2120 switch (ret) {
2121 case 0:
2122#ifdef DEBUG_ENCODING
2123 xmlGenericError(xmlGenericErrorContext,
2124 "converted %d bytes to %d bytes of input\n",
2125 c_in, c_out);
2126#endif
2127 break;
2128 case -1:
2129#ifdef DEBUG_ENCODING
2130 xmlGenericError(xmlGenericErrorContext,
2131 "converted %d bytes to %d bytes of input, %d left\n",
2132 c_in, c_out, (int)xmlBufUse(in));
2133#endif
2134 break;
2135 case -3:
2136#ifdef DEBUG_ENCODING
2137 xmlGenericError(xmlGenericErrorContext,
2138 "converted %d bytes to %d bytes of input, %d left\n",
2139 c_in, c_out, (int)xmlBufUse(in));
2140#endif
2141 break;
2142 case -2: {
2143 char buf[50];
2144 const xmlChar *content = xmlBufContent(in);
2145
2146 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2147 content[0], content[1],
2148 content[2], content[3]);
2149 buf[49] = 0;
2150 xmlEncodingErr(XML_I18N_CONV_FAILED,
2151 "input conversion failed due to input error, bytes %s\n",
2152 buf);
2153 }
2154 }
2155 /*
2156 * Ignore when input buffer is not on a boundary
2157 */
2158 if (ret == -3) ret = 0;
2159 if (ret == -1) ret = 0;
2160 return(ret);
2161}
2162
2163/**
2164 * xmlCharEncInput:
2165 * @input: a parser input buffer
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002166 * @flush: try to flush all the raw buffer
Daniel Veillard18d0db22012-07-13 19:51:15 +08002167 *
2168 * Generic front-end for the encoding handler on parser input
2169 *
2170 * Returns the number of byte written if success, or
2171 * -1 general error
2172 * -2 if the transcoding fails (for *in is not valid utf8 string or
2173 * the result of transformation can't fit into the encoding we want), or
2174 */
2175int
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002176xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
Daniel Veillard18d0db22012-07-13 19:51:15 +08002177{
2178 int ret = -2;
2179 size_t written;
2180 size_t toconv;
2181 int c_in;
2182 int c_out;
2183 xmlBufPtr in;
2184 xmlBufPtr out;
2185
2186 if ((input == NULL) || (input->encoder == NULL) ||
2187 (input->buffer == NULL) || (input->raw == NULL))
2188 return (-1);
2189 out = input->buffer;
2190 in = input->raw;
2191
2192 toconv = xmlBufUse(in);
2193 if (toconv == 0)
2194 return (0);
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002195 if ((toconv > 64 * 1024) && (flush == 0))
Daniel Veillard18d0db22012-07-13 19:51:15 +08002196 toconv = 64 * 1024;
2197 written = xmlBufAvail(out);
2198 if (written > 0)
2199 written--; /* count '\0' */
2200 if (toconv * 2 >= written) {
2201 xmlBufGrow(out, toconv * 2);
2202 written = xmlBufAvail(out);
2203 if (written > 0)
2204 written--; /* count '\0' */
2205 }
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002206 if ((written > 128 * 1024) && (flush == 0))
Daniel Veillard18d0db22012-07-13 19:51:15 +08002207 written = 128 * 1024;
2208
2209 c_in = toconv;
2210 c_out = written;
2211 if (input->encoder->input != NULL) {
2212 ret = input->encoder->input(xmlBufEnd(out), &c_out,
2213 xmlBufContent(in), &c_in);
2214 xmlBufShrink(in, c_in);
2215 xmlBufAddLen(out, c_out);
2216 }
2217#ifdef LIBXML_ICONV_ENABLED
2218 else if (input->encoder->iconv_in != NULL) {
2219 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2220 &c_out, xmlBufContent(in), &c_in);
2221 xmlBufShrink(in, c_in);
2222 xmlBufAddLen(out, c_out);
2223 if (ret == -1)
2224 ret = -3;
2225 }
2226#endif /* LIBXML_ICONV_ENABLED */
2227#ifdef LIBXML_ICU_ENABLED
2228 else if (input->encoder->uconv_in != NULL) {
2229 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2230 &c_out, xmlBufContent(in), &c_in);
2231 xmlBufShrink(in, c_in);
2232 xmlBufAddLen(out, c_out);
2233 if (ret == -1)
2234 ret = -3;
2235 }
2236#endif /* LIBXML_ICU_ENABLED */
2237 switch (ret) {
2238 case 0:
2239#ifdef DEBUG_ENCODING
2240 xmlGenericError(xmlGenericErrorContext,
2241 "converted %d bytes to %d bytes of input\n",
2242 c_in, c_out);
2243#endif
2244 break;
2245 case -1:
2246#ifdef DEBUG_ENCODING
2247 xmlGenericError(xmlGenericErrorContext,
2248 "converted %d bytes to %d bytes of input, %d left\n",
2249 c_in, c_out, (int)xmlBufUse(in));
2250#endif
2251 break;
2252 case -3:
2253#ifdef DEBUG_ENCODING
2254 xmlGenericError(xmlGenericErrorContext,
2255 "converted %d bytes to %d bytes of input, %d left\n",
2256 c_in, c_out, (int)xmlBufUse(in));
2257#endif
2258 break;
2259 case -2: {
2260 char buf[50];
2261 const xmlChar *content = xmlBufContent(in);
2262
2263 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2264 content[0], content[1],
2265 content[2], content[3]);
2266 buf[49] = 0;
2267 xmlEncodingErr(XML_I18N_CONV_FAILED,
2268 "input conversion failed due to input error, bytes %s\n",
2269 buf);
2270 }
2271 }
2272 /*
2273 * Ignore when input buffer is not on a boundary
2274 */
2275 if (ret == -3)
2276 ret = 0;
2277 return (c_out? c_out : ret);
2278}
2279
2280/**
Owen Taylor3473f882001-02-23 17:55:21 +00002281 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002282 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002283 * @out: an xmlBuffer for the output.
2284 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002285 *
Owen Taylor3473f882001-02-23 17:55:21 +00002286 * Generic front-end for the encoding handler input function
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002287 *
2288 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002289 * -1 general error
2290 * -2 if the transcoding fails (for *in is not valid utf8 string or
2291 * the result of transformation can't fit into the encoding we want), or
2292 */
2293int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002294xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2295 xmlBufferPtr in)
2296{
Owen Taylor3473f882001-02-23 17:55:21 +00002297 int ret = -2;
2298 int written;
2299 int toconv;
2300
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002301 if (handler == NULL)
2302 return (-1);
2303 if (out == NULL)
2304 return (-1);
2305 if (in == NULL)
2306 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002307
2308 toconv = in->use;
2309 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002310 return (0);
Daniel Veillard69f04562011-08-19 11:05:04 +08002311 written = out->size - out->use -1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00002312 if (toconv * 2 >= written) {
2313 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002314 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002315 }
2316 if (handler->input != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002317 ret = handler->input(&out->content[out->use], &written,
2318 in->content, &toconv);
2319 xmlBufferShrink(in, toconv);
2320 out->use += written;
2321 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002322 }
2323#ifdef LIBXML_ICONV_ENABLED
2324 else if (handler->iconv_in != NULL) {
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002325 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2326 &written, in->content, &toconv);
2327 xmlBufferShrink(in, toconv);
2328 out->use += written;
2329 out->content[out->use] = 0;
2330 if (ret == -1)
2331 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002332 }
2333#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002334#ifdef LIBXML_ICU_ENABLED
2335 else if (handler->uconv_in != NULL) {
2336 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2337 &written, in->content, &toconv);
2338 xmlBufferShrink(in, toconv);
2339 out->use += written;
2340 out->content[out->use] = 0;
2341 if (ret == -1)
2342 ret = -3;
2343 }
2344#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002345 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002346 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002347#ifdef DEBUG_ENCODING
2348 xmlGenericError(xmlGenericErrorContext,
2349 "converted %d bytes to %d bytes of input\n",
2350 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002351#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002352 break;
2353 case -1:
2354#ifdef DEBUG_ENCODING
2355 xmlGenericError(xmlGenericErrorContext,
2356 "converted %d bytes to %d bytes of input, %d left\n",
2357 toconv, written, in->use);
2358#endif
2359 break;
2360 case -3:
2361#ifdef DEBUG_ENCODING
2362 xmlGenericError(xmlGenericErrorContext,
2363 "converted %d bytes to %d bytes of input, %d left\n",
2364 toconv, written, in->use);
2365#endif
2366 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002367 case -2: {
2368 char buf[50];
2369
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002370 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002371 in->content[0], in->content[1],
2372 in->content[2], in->content[3]);
2373 buf[49] = 0;
2374 xmlEncodingErr(XML_I18N_CONV_FAILED,
2375 "input conversion failed due to input error, bytes %s\n",
2376 buf);
2377 }
Owen Taylor3473f882001-02-23 17:55:21 +00002378 }
2379 /*
2380 * Ignore when input buffer is not on a boundary
2381 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002382 if (ret == -3)
2383 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00002384 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002385}
2386
Denis Pauke28c8a12013-08-03 14:22:54 +03002387#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00002388/**
Daniel Veillard18d0db22012-07-13 19:51:15 +08002389 * xmlCharEncOutput:
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002390 * @output: a parser output buffer
Daniel Veillard18d0db22012-07-13 19:51:15 +08002391 * @init: is this an initialization call without data
2392 *
2393 * Generic front-end for the encoding handler on parser output
2394 * a first call with @init == 1 has to be made first to initiate the
2395 * output in case of non-stateless encoding needing to initiate their
2396 * state or the output (like the BOM in UTF16).
2397 * In case of UTF8 sequence conversion errors for the given encoder,
2398 * the content will be automatically remapped to a CharRef sequence.
2399 *
2400 * Returns the number of byte written if success, or
2401 * -1 general error
2402 * -2 if the transcoding fails (for *in is not valid utf8 string or
2403 * the result of transformation can't fit into the encoding we want), or
2404 */
2405int
2406xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2407{
2408 int ret = -2;
2409 size_t written;
2410 size_t writtentot = 0;
2411 size_t toconv;
2412 int c_in;
2413 int c_out;
2414 xmlBufPtr in;
2415 xmlBufPtr out;
2416 int charref_len = 0;
2417
2418 if ((output == NULL) || (output->encoder == NULL) ||
2419 (output->buffer == NULL) || (output->conv == NULL))
2420 return (-1);
2421 out = output->conv;
2422 in = output->buffer;
2423
2424retry:
2425
2426 written = xmlBufAvail(out);
2427 if (written > 0)
2428 written--; /* count '\0' */
2429
2430 /*
2431 * First specific handling of the initialization call
2432 */
2433 if (init) {
2434 c_in = 0;
2435 c_out = written;
2436 if (output->encoder->output != NULL) {
2437 ret = output->encoder->output(xmlBufEnd(out), &c_out,
2438 NULL, &c_in);
2439 if (ret > 0) /* Gennady: check return value */
2440 xmlBufAddLen(out, c_out);
2441 }
2442#ifdef LIBXML_ICONV_ENABLED
2443 else if (output->encoder->iconv_out != NULL) {
2444 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2445 &c_out, NULL, &c_in);
2446 xmlBufAddLen(out, c_out);
2447 }
2448#endif /* LIBXML_ICONV_ENABLED */
2449#ifdef LIBXML_ICU_ENABLED
2450 else if (output->encoder->uconv_out != NULL) {
2451 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2452 &c_out, NULL, &c_in);
2453 xmlBufAddLen(out, c_out);
2454 }
2455#endif /* LIBXML_ICU_ENABLED */
2456#ifdef DEBUG_ENCODING
2457 xmlGenericError(xmlGenericErrorContext,
2458 "initialized encoder\n");
2459#endif
2460 return(0);
2461 }
2462
2463 /*
2464 * Conversion itself.
2465 */
2466 toconv = xmlBufUse(in);
2467 if (toconv == 0)
2468 return (0);
2469 if (toconv > 64 * 1024)
2470 toconv = 64 * 1024;
2471 if (toconv * 4 >= written) {
2472 xmlBufGrow(out, toconv * 4);
2473 written = xmlBufAvail(out) - 1;
2474 }
2475 if (written > 256 * 1024)
2476 written = 256 * 1024;
2477
2478 c_in = toconv;
2479 c_out = written;
2480 if (output->encoder->output != NULL) {
2481 ret = output->encoder->output(xmlBufEnd(out), &c_out,
2482 xmlBufContent(in), &c_in);
2483 if (c_out > 0) {
2484 xmlBufShrink(in, c_in);
2485 xmlBufAddLen(out, c_out);
2486 writtentot += c_out;
2487 }
2488 }
2489#ifdef LIBXML_ICONV_ENABLED
2490 else if (output->encoder->iconv_out != NULL) {
2491 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2492 &c_out, xmlBufContent(in), &c_in);
2493 xmlBufShrink(in, c_in);
2494 xmlBufAddLen(out, c_out);
2495 writtentot += c_out;
2496 if (ret == -1) {
2497 if (c_out > 0) {
2498 /*
2499 * Can be a limitation of iconv
2500 */
2501 charref_len = 0;
2502 goto retry;
2503 }
2504 ret = -3;
2505 }
2506 }
2507#endif /* LIBXML_ICONV_ENABLED */
2508#ifdef LIBXML_ICU_ENABLED
2509 else if (output->encoder->uconv_out != NULL) {
2510 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2511 &c_out, xmlBufContent(in), &c_in);
2512 xmlBufShrink(in, c_in);
2513 xmlBufAddLen(out, c_out);
2514 writtentot += c_out;
2515 if (ret == -1) {
2516 if (c_out > 0) {
2517 /*
2518 * Can be a limitation of uconv
2519 */
2520 charref_len = 0;
2521 goto retry;
2522 }
2523 ret = -3;
2524 }
2525 }
2526#endif /* LIBXML_ICU_ENABLED */
2527 else {
2528 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2529 "xmlCharEncOutFunc: no output function !\n", NULL);
2530 return(-1);
2531 }
2532
2533 if (ret >= 0) output += ret;
2534
2535 /*
2536 * Attempt to handle error cases
2537 */
2538 switch (ret) {
2539 case 0:
2540#ifdef DEBUG_ENCODING
2541 xmlGenericError(xmlGenericErrorContext,
2542 "converted %d bytes to %d bytes of output\n",
2543 c_in, c_out);
2544#endif
2545 break;
2546 case -1:
2547#ifdef DEBUG_ENCODING
2548 xmlGenericError(xmlGenericErrorContext,
2549 "output conversion failed by lack of space\n");
2550#endif
2551 break;
2552 case -3:
2553#ifdef DEBUG_ENCODING
2554 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2555 c_in, c_out, (int) xmlBufUse(in));
2556#endif
2557 break;
2558 case -2: {
2559 int len = (int) xmlBufUse(in);
2560 xmlChar *content = xmlBufContent(in);
2561 int cur;
2562
2563 cur = xmlGetUTF8Char(content, &len);
2564 if ((charref_len != 0) && (c_out < charref_len)) {
2565 /*
2566 * We attempted to insert a character reference and failed.
2567 * Undo what was written and skip the remaining charref.
2568 */
2569 xmlBufErase(out, c_out);
2570 writtentot -= c_out;
2571 xmlBufShrink(in, charref_len - c_out);
2572 charref_len = 0;
2573
2574 ret = -1;
2575 break;
2576 } else if (cur > 0) {
2577 xmlChar charref[20];
2578
2579#ifdef DEBUG_ENCODING
2580 xmlGenericError(xmlGenericErrorContext,
2581 "handling output conversion error\n");
2582 xmlGenericError(xmlGenericErrorContext,
2583 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2584 content[0], content[1],
2585 content[2], content[3]);
2586#endif
2587 /*
2588 * Removes the UTF8 sequence, and replace it by a charref
2589 * and continue the transcoding phase, hoping the error
2590 * did not mangle the encoder state.
2591 */
2592 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2593 "&#%d;", cur);
2594 xmlBufShrink(in, len);
2595 xmlBufAddHead(in, charref, -1);
2596
2597 goto retry;
2598 } else {
2599 char buf[50];
2600
2601 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2602 content[0], content[1],
2603 content[2], content[3]);
2604 buf[49] = 0;
2605 xmlEncodingErr(XML_I18N_CONV_FAILED,
2606 "output conversion failed due to conv error, bytes %s\n",
2607 buf);
2608 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2609 content[0] = ' ';
2610 }
2611 break;
2612 }
2613 }
2614 return(ret);
2615}
Denis Pauke28c8a12013-08-03 14:22:54 +03002616#endif
Daniel Veillard18d0db22012-07-13 19:51:15 +08002617
2618/**
Owen Taylor3473f882001-02-23 17:55:21 +00002619 * xmlCharEncOutFunc:
2620 * @handler: char enconding transformation data structure
2621 * @out: an xmlBuffer for the output.
2622 * @in: an xmlBuffer for the input
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002623 *
Owen Taylor3473f882001-02-23 17:55:21 +00002624 * Generic front-end for the encoding handler output function
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002625 * a first call with @in == NULL has to be made firs to initiate the
Owen Taylor3473f882001-02-23 17:55:21 +00002626 * output in case of non-stateless encoding needing to initiate their
2627 * state or the output (like the BOM in UTF16).
2628 * In case of UTF8 sequence conversion errors for the given encoder,
2629 * the content will be automatically remapped to a CharRef sequence.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002630 *
2631 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002632 * -1 general error
2633 * -2 if the transcoding fails (for *in is not valid utf8 string or
2634 * the result of transformation can't fit into the encoding we want), or
2635 */
2636int
2637xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2638 xmlBufferPtr in) {
2639 int ret = -2;
2640 int written;
2641 int writtentot = 0;
2642 int toconv;
2643 int output = 0;
Timothy Elliott689408b2012-05-08 22:03:22 +08002644 int charref_len = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002645
2646 if (handler == NULL) return(-1);
2647 if (out == NULL) return(-1);
2648
2649retry:
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002650
Owen Taylor3473f882001-02-23 17:55:21 +00002651 written = out->size - out->use;
2652
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002653 if (written > 0)
2654 written--; /* Gennady: count '/0' */
2655
Owen Taylor3473f882001-02-23 17:55:21 +00002656 /*
2657 * First specific handling of in = NULL, i.e. the initialization call
2658 */
2659 if (in == NULL) {
2660 toconv = 0;
2661 if (handler->output != NULL) {
2662 ret = handler->output(&out->content[out->use], &written,
2663 NULL, &toconv);
Daniel Veillard8caa9c22003-06-02 13:35:24 +00002664 if (ret >= 0) { /* Gennady: check return value */
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002665 out->use += written;
2666 out->content[out->use] = 0;
2667 }
Owen Taylor3473f882001-02-23 17:55:21 +00002668 }
2669#ifdef LIBXML_ICONV_ENABLED
2670 else if (handler->iconv_out != NULL) {
2671 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2672 &written, NULL, &toconv);
2673 out->use += written;
2674 out->content[out->use] = 0;
2675 }
2676#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002677#ifdef LIBXML_ICU_ENABLED
2678 else if (handler->uconv_out != NULL) {
2679 ret = xmlUconvWrapper(handler->uconv_out, 0,
2680 &out->content[out->use],
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002681 &written, NULL, &toconv);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002682 out->use += written;
2683 out->content[out->use] = 0;
2684 }
2685#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002686#ifdef DEBUG_ENCODING
2687 xmlGenericError(xmlGenericErrorContext,
2688 "initialized encoder\n");
2689#endif
2690 return(0);
2691 }
2692
2693 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002694 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002695 */
2696 toconv = in->use;
2697 if (toconv == 0)
2698 return(0);
Daniel Veillardf1245392008-04-03 09:46:34 +00002699 if (toconv * 4 >= written) {
2700 xmlBufferGrow(out, toconv * 4);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 written = out->size - out->use - 1;
2702 }
2703 if (handler->output != NULL) {
2704 ret = handler->output(&out->content[out->use], &written,
2705 in->content, &toconv);
Daniel Veillarde83e93e2008-08-30 12:52:26 +00002706 if (written > 0) {
2707 xmlBufferShrink(in, toconv);
2708 out->use += written;
2709 writtentot += written;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002710 }
Owen Taylor3473f882001-02-23 17:55:21 +00002711 out->content[out->use] = 0;
2712 }
2713#ifdef LIBXML_ICONV_ENABLED
2714 else if (handler->iconv_out != NULL) {
2715 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2716 &written, in->content, &toconv);
2717 xmlBufferShrink(in, toconv);
2718 out->use += written;
2719 writtentot += written;
2720 out->content[out->use] = 0;
2721 if (ret == -1) {
2722 if (written > 0) {
2723 /*
2724 * Can be a limitation of iconv
2725 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002726 charref_len = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002727 goto retry;
2728 }
2729 ret = -3;
2730 }
2731 }
2732#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002733#ifdef LIBXML_ICU_ENABLED
2734 else if (handler->uconv_out != NULL) {
2735 ret = xmlUconvWrapper(handler->uconv_out, 0,
2736 &out->content[out->use],
2737 &written, in->content, &toconv);
2738 xmlBufferShrink(in, toconv);
2739 out->use += written;
2740 writtentot += written;
2741 out->content[out->use] = 0;
2742 if (ret == -1) {
2743 if (written > 0) {
2744 /*
2745 * Can be a limitation of iconv
2746 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002747 charref_len = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002748 goto retry;
2749 }
2750 ret = -3;
2751 }
2752 }
2753#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002755 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2756 "xmlCharEncOutFunc: no output function !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002757 return(-1);
2758 }
2759
2760 if (ret >= 0) output += ret;
2761
2762 /*
2763 * Attempt to handle error cases
2764 */
2765 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002766 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002767#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002768 xmlGenericError(xmlGenericErrorContext,
2769 "converted %d bytes to %d bytes of output\n",
2770 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002771#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002772 break;
2773 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002774#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002775 xmlGenericError(xmlGenericErrorContext,
2776 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002777#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002778 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002779 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002780#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002781 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2782 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002783#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002784 break;
2785 case -2: {
2786 int len = in->use;
2787 const xmlChar *utf = (const xmlChar *) in->content;
2788 int cur;
2789
2790 cur = xmlGetUTF8Char(utf, &len);
Timothy Elliott689408b2012-05-08 22:03:22 +08002791 if ((charref_len != 0) && (written < charref_len)) {
2792 /*
2793 * We attempted to insert a character reference and failed.
2794 * Undo what was written and skip the remaining charref.
2795 */
2796 out->use -= written;
2797 writtentot -= written;
2798 xmlBufferShrink(in, charref_len - written);
2799 charref_len = 0;
2800
2801 ret = -1;
2802 break;
2803 } else if (cur > 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00002804 xmlChar charref[20];
2805
2806#ifdef DEBUG_ENCODING
2807 xmlGenericError(xmlGenericErrorContext,
2808 "handling output conversion error\n");
2809 xmlGenericError(xmlGenericErrorContext,
2810 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2811 in->content[0], in->content[1],
2812 in->content[2], in->content[3]);
2813#endif
2814 /*
2815 * Removes the UTF8 sequence, and replace it by a charref
2816 * and continue the transcoding phase, hoping the error
2817 * did not mangle the encoder state.
2818 */
Timothy Elliott689408b2012-05-08 22:03:22 +08002819 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2820 "&#%d;", cur);
Owen Taylor3473f882001-02-23 17:55:21 +00002821 xmlBufferShrink(in, len);
2822 xmlBufferAddHead(in, charref, -1);
2823
2824 goto retry;
2825 } else {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002826 char buf[50];
2827
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002828 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002829 in->content[0], in->content[1],
2830 in->content[2], in->content[3]);
2831 buf[49] = 0;
2832 xmlEncodingErr(XML_I18N_CONV_FAILED,
2833 "output conversion failed due to conv error, bytes %s\n",
2834 buf);
Daniel Veillarddf750622006-05-02 12:24:06 +00002835 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2836 in->content[0] = ' ';
Owen Taylor3473f882001-02-23 17:55:21 +00002837 }
2838 break;
2839 }
2840 }
2841 return(ret);
2842}
2843
2844/**
2845 * xmlCharEncCloseFunc:
2846 * @handler: char enconding transformation data structure
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002847 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002848 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002849 *
2850 * Returns 0 if success, or -1 in case of error
2851 */
2852int
2853xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2854 int ret = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002855 int tofree = 0;
Gaurav080a22c2013-11-29 23:10:50 +08002856 int i, handler_in_list = 0;
2857
Owen Taylor3473f882001-02-23 17:55:21 +00002858 if (handler == NULL) return(-1);
2859 if (handler->name == NULL) return(-1);
Gaurav080a22c2013-11-29 23:10:50 +08002860 if (handlers != NULL) {
2861 for (i = 0;i < nbCharEncodingHandler; i++) {
2862 if (handler == handlers[i]) {
2863 handler_in_list = 1;
2864 break;
2865 }
2866 }
2867 }
Owen Taylor3473f882001-02-23 17:55:21 +00002868#ifdef LIBXML_ICONV_ENABLED
2869 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002870 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002871 * and the associated icon resources.
2872 */
Gaurav080a22c2013-11-29 23:10:50 +08002873 if ((handler_in_list == 0) &&
2874 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002875 tofree = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 if (handler->iconv_out != NULL) {
2877 if (iconv_close(handler->iconv_out))
2878 ret = -1;
2879 handler->iconv_out = NULL;
2880 }
2881 if (handler->iconv_in != NULL) {
2882 if (iconv_close(handler->iconv_in))
2883 ret = -1;
2884 handler->iconv_in = NULL;
2885 }
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002888#ifdef LIBXML_ICU_ENABLED
Gaurav080a22c2013-11-29 23:10:50 +08002889 if ((handler_in_list == 0) &&
2890 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002891 tofree = 1;
2892 if (handler->uconv_out != NULL) {
2893 closeIcuConverter(handler->uconv_out);
2894 handler->uconv_out = NULL;
2895 }
2896 if (handler->uconv_in != NULL) {
2897 closeIcuConverter(handler->uconv_in);
2898 handler->uconv_in = NULL;
2899 }
2900 }
2901#endif
2902 if (tofree) {
2903 /* free up only dynamic handlers iconv/uconv */
2904 if (handler->name != NULL)
2905 xmlFree(handler->name);
2906 handler->name = NULL;
2907 xmlFree(handler);
2908 }
Owen Taylor3473f882001-02-23 17:55:21 +00002909#ifdef DEBUG_ENCODING
2910 if (ret)
2911 xmlGenericError(xmlGenericErrorContext,
2912 "failed to close the encoding handler\n");
2913 else
2914 xmlGenericError(xmlGenericErrorContext,
2915 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002916#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002917
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(ret);
2919}
2920
Daniel Veillard36711902004-02-11 13:25:26 +00002921/**
2922 * xmlByteConsumed:
2923 * @ctxt: an XML parser context
2924 *
2925 * This function provides the current index of the parser relative
2926 * to the start of the current entity. This function is computed in
2927 * bytes from the beginning starting at zero and finishing at the
2928 * size in byte of the file if parsing a file. The function is
2929 * of constant cost if the input is UTF-8 but can be costly if run
2930 * on non-UTF-8 input.
2931 *
2932 * Returns the index in bytes from the beginning of the entity or -1
2933 * in case the index could not be computed.
2934 */
2935long
2936xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2937 xmlParserInputPtr in;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002938
Daniel Veillard36711902004-02-11 13:25:26 +00002939 if (ctxt == NULL) return(-1);
2940 in = ctxt->input;
2941 if (in == NULL) return(-1);
2942 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2943 unsigned int unused = 0;
2944 xmlCharEncodingHandler * handler = in->buf->encoder;
2945 /*
2946 * Encoding conversion, compute the number of unused original
2947 * bytes from the input not consumed and substract that from
2948 * the raw consumed value, this is not a cheap operation
2949 */
2950 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002951 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002952 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002953 int toconv = in->end - in->cur, written = 32000;
2954
2955 int ret;
2956
2957 if (handler->output != NULL) {
2958 do {
2959 toconv = in->end - cur;
2960 written = 32000;
2961 ret = handler->output(&convbuf[0], &written,
2962 cur, &toconv);
2963 if (ret == -1) return(-1);
2964 unused += written;
2965 cur += toconv;
2966 } while (ret == -2);
2967#ifdef LIBXML_ICONV_ENABLED
2968 } else if (handler->iconv_out != NULL) {
2969 do {
2970 toconv = in->end - cur;
2971 written = 32000;
2972 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2973 &written, cur, &toconv);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00002974 if (ret < 0) {
Daniel Veillard36711902004-02-11 13:25:26 +00002975 if (written > 0)
2976 ret = -2;
2977 else
2978 return(-1);
2979 }
2980 unused += written;
2981 cur += toconv;
2982 } while (ret == -2);
2983#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002984#ifdef LIBXML_ICU_ENABLED
2985 } else if (handler->uconv_out != NULL) {
2986 do {
2987 toconv = in->end - cur;
2988 written = 32000;
2989 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2990 &written, cur, &toconv);
2991 if (ret < 0) {
2992 if (written > 0)
2993 ret = -2;
2994 else
2995 return(-1);
2996 }
2997 unused += written;
2998 cur += toconv;
2999 } while (ret == -2);
3000#endif
Daniel Veillard36711902004-02-11 13:25:26 +00003001 } else {
3002 /* could not find a converter */
3003 return(-1);
3004 }
3005 }
3006 if (in->buf->rawconsumed < unused)
3007 return(-1);
3008 return(in->buf->rawconsumed - unused);
3009 }
3010 return(in->consumed + (in->cur - in->base));
3011}
3012
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003013#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003014#ifdef LIBXML_ISO8859X_ENABLED
3015
3016/**
3017 * UTF8ToISO8859x:
3018 * @out: a pointer to an array of bytes to store the result
3019 * @outlen: the length of @out
3020 * @in: a pointer to an array of UTF-8 chars
3021 * @inlen: the length of @in
3022 * @xlattable: the 2-level transcoding table
3023 *
3024 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3025 * block of chars out.
3026 *
3027 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
3028 * The value of @inlen after return is the number of octets consumed
3029 * as the return value is positive, else unpredictable.
3030 * The value of @outlen after return is the number of ocetes consumed.
3031 */
3032static int
3033UTF8ToISO8859x(unsigned char* out, int *outlen,
3034 const unsigned char* in, int *inlen,
3035 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003036 const unsigned char* outstart = out;
3037 const unsigned char* inend;
3038 const unsigned char* instart = in;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003039 const unsigned char* processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003040
Daniel Veillardce682bc2004-11-05 17:22:25 +00003041 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3042 (xlattable == NULL))
3043 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003044 if (in == NULL) {
3045 /*
3046 * initialization nothing to do
3047 */
3048 *outlen = 0;
3049 *inlen = 0;
3050 return(0);
3051 }
3052 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003053 while (in < inend) {
3054 unsigned char d = *in++;
3055 if (d < 0x80) {
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003056 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003057 } else if (d < 0xC0) {
3058 /* trailing byte in leading position */
3059 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003060 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003061 return(-2);
3062 } else if (d < 0xE0) {
3063 unsigned char c;
3064 if (!(in < inend)) {
3065 /* trailing byte not in input buffer */
3066 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003067 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01003068 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003069 }
3070 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003071 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003072 /* not a trailing byte */
3073 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003074 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003075 return(-2);
3076 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003077 c = c & 0x3F;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003078 d = d & 0x1F;
3079 d = xlattable [48 + c + xlattable [d] * 64];
3080 if (d == 0) {
3081 /* not in character set */
3082 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003083 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003084 return(-2);
3085 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003086 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003087 } else if (d < 0xF0) {
3088 unsigned char c1;
3089 unsigned char c2;
3090 if (!(in < inend - 1)) {
3091 /* trailing bytes not in input buffer */
3092 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003093 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01003094 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003095 }
3096 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003097 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003098 /* not a trailing byte (c1) */
3099 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003100 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003101 return(-2);
3102 }
3103 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00003104 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003105 /* not a trailing byte (c2) */
3106 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003107 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003108 return(-2);
3109 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003110 c1 = c1 & 0x3F;
3111 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00003112 d = d & 0x0F;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003113 d = xlattable [48 + c2 + xlattable [48 + c1 +
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003114 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003115 if (d == 0) {
3116 /* not in character set */
3117 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003118 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003119 return(-2);
3120 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003121 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003122 } else {
3123 /* cannot transcode >= U+010000 */
3124 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003125 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003126 return(-2);
3127 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003128 processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003129 }
3130 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003131 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00003132 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003133}
3134
3135/**
3136 * ISO8859xToUTF8
3137 * @out: a pointer to an array of bytes to store the result
3138 * @outlen: the length of @out
3139 * @in: a pointer to an array of ISO Latin 1 chars
3140 * @inlen: the length of @in
3141 *
3142 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3143 * block of chars out.
3144 * Returns 0 if success, or -1 otherwise
3145 * The value of @inlen after return is the number of octets consumed
3146 * The value of @outlen after return is the number of ocetes produced.
3147 */
3148static int
3149ISO8859xToUTF8(unsigned char* out, int *outlen,
3150 const unsigned char* in, int *inlen,
3151 unsigned short const *unicodetable) {
3152 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003153 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003154 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003155 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00003156 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00003157 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003158
Daniel Veillardce682bc2004-11-05 17:22:25 +00003159 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00003160 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00003161 return(-1);
3162 outend = out + *outlen;
3163 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00003164 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003165
3166 while ((in < inend) && (out < outend - 2)) {
3167 if (*in >= 0x80) {
3168 c = unicodetable [*in - 0x80];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003169 if (c == 0) {
3170 /* undefined code point */
3171 *outlen = out - outstart;
3172 *inlen = in - instart;
3173 return (-1);
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003174 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003175 if (c < 0x800) {
3176 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3177 *out++ = (c & 0x3F) | 0x80;
3178 } else {
3179 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3180 *out++ = ((c >> 6) & 0x3F) | 0x80;
3181 *out++ = (c & 0x3F) | 0x80;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003182 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003183 ++in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003184 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003185 if (instop - in > outend - out) instop = in + (outend - out);
3186 while ((*in < 0x80) && (in < instop)) {
3187 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003188 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003189 }
3190 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3191 *out++ = *in++;
3192 }
3193 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3194 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003195 }
3196 *outlen = out - outstart;
3197 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00003198 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003199}
3200
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003201
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003202/************************************************************************
3203 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3204 ************************************************************************/
3205
3206static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003207 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3208 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3209 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3210 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3211 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3212 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3213 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3214 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3215 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3216 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3217 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3218 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3219 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3220 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3221 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3222 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003223};
3224
3225static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3226 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3234 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3235 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3236 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3238 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3246 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3247 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3248 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3249 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3250 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3251 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3252 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3253};
3254
3255static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003256 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3257 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3258 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3259 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3260 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3261 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3262 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3263 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3264 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3265 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3266 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3267 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3268 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3269 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3270 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3271 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003272};
3273
3274static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3275 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3283 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3284 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3285 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3288 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3300 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3302 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3304 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3306};
3307
3308static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003309 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3310 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3311 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3312 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3313 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3314 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3315 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3316 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3317 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3318 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3319 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3320 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3321 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3322 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3323 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3324 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003325};
3326
3327static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3328 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3336 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3337 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3338 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3339 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3340 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3342 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3346 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3352 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3353 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3354 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3355};
3356
3357static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003358 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3359 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3360 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3361 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3362 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3363 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3364 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3365 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3366 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3367 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3368 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3369 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3370 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3371 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3372 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3373 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003374};
3375
3376static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3377 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3385 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3386 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3389 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3390 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3391 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3392 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3393 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404};
3405
3406static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003407 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3408 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3409 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3410 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3411 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3412 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3413 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3414 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3415 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3416 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3417 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3418 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3419 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3420 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3421 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3422 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003423};
3424
3425static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3426 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3434 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3435 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3443 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3444 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3445 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3446 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449};
3450
3451static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003452 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3453 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3454 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3455 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3456 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3457 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3458 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3459 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3460 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3461 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3462 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3463 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3464 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3465 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3466 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3467 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003468};
3469
3470static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3471 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3479 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3480 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3481 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3482 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3495 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3496 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3497 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3498 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3499 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3500 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3502};
3503
3504static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003505 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3506 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3507 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3508 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3509 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3510 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3511 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3512 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3513 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3514 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3515 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3516 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3517 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3518 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3519 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3520 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003521};
3522
3523static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3524 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3532 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3533 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3534 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3535 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3543 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3548 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3553 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555};
3556
3557static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003558 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3559 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3560 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3561 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3562 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3563 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3564 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3565 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3566 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3567 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3568 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3569 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3570 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3571 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3572 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3573 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003574};
3575
3576static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3577 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3585 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3586 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3587 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3588 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3589 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3590 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3591 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600};
3601
3602static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003603 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3604 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3605 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3606 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3607 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3608 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3609 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3610 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3611 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3612 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3613 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3614 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3615 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3616 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3617 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3618 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003619};
3620
3621static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3622 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3630 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3631 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3632 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3634 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3636 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3640 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3641 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3650 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3651 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3652 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3653};
3654
3655static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3661 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3662 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3663 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3664 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3665 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3666 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3667 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3668 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3669 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3670 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3671 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003672};
3673
3674static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3675 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3690 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3691 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3692 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3693 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3694 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3699 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702};
3703
3704static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003705 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3710 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3711 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3712 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3713 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3714 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3715 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3716 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3717 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3718 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3719 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3720 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003721};
3722
3723static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3724 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3734 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3735 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3745 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3747 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3749 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3750 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3751 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3752 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3754 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3755};
3756
3757static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003758 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3759 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3760 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3761 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3762 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3763 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3764 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3765 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3766 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3767 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3768 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3769 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3770 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3771 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3772 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3773 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003774};
3775
3776static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3777 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3785 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3786 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3787 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3792 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3812 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3814 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3817 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3818 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3819 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3820};
3821
3822static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003823 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3824 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3825 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3826 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3827 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3828 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3829 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3830 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3831 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3832 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3833 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3834 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3835 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3836 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3837 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3838 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003839};
3840
3841static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3842 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3850 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3851 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3852 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3853 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3862 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3863 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3864 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3865 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3866 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3867 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3868 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3869};
3870
3871static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003872 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3873 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3874 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3875 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3876 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3877 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3878 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3879 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3880 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3881 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3882 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3883 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3884 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3885 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3886 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3887 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003888};
3889
3890static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3891 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3893 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3898 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3899 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3900 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3901 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3902 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3903 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3906 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3907 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3908 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3909 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3910 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3913 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3919 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3923 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3926 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3927 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3928 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3929 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3930};
3931
3932
3933/*
3934 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3935 */
3936
3937static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3938 const unsigned char* in, int *inlen) {
3939 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3940}
3941static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3942 const unsigned char* in, int *inlen) {
3943 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3944}
3945
3946static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3947 const unsigned char* in, int *inlen) {
3948 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3949}
3950static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3951 const unsigned char* in, int *inlen) {
3952 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3953}
3954
3955static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3956 const unsigned char* in, int *inlen) {
3957 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3958}
3959static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3960 const unsigned char* in, int *inlen) {
3961 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3962}
3963
3964static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3965 const unsigned char* in, int *inlen) {
3966 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3967}
3968static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3969 const unsigned char* in, int *inlen) {
3970 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3971}
3972
3973static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3974 const unsigned char* in, int *inlen) {
3975 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3976}
3977static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3978 const unsigned char* in, int *inlen) {
3979 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3980}
3981
3982static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3983 const unsigned char* in, int *inlen) {
3984 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3985}
3986static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3987 const unsigned char* in, int *inlen) {
3988 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3989}
3990
3991static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3992 const unsigned char* in, int *inlen) {
3993 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3994}
3995static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3996 const unsigned char* in, int *inlen) {
3997 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3998}
3999
4000static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
4001 const unsigned char* in, int *inlen) {
4002 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
4003}
4004static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
4005 const unsigned char* in, int *inlen) {
4006 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
4007}
4008
4009static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
4010 const unsigned char* in, int *inlen) {
4011 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
4012}
4013static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
4014 const unsigned char* in, int *inlen) {
4015 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
4016}
4017
4018static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
4019 const unsigned char* in, int *inlen) {
4020 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4021}
4022static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
4023 const unsigned char* in, int *inlen) {
4024 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4025}
4026
4027static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
4028 const unsigned char* in, int *inlen) {
4029 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4030}
4031static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
4032 const unsigned char* in, int *inlen) {
4033 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4034}
4035
4036static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
4037 const unsigned char* in, int *inlen) {
4038 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4039}
4040static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
4041 const unsigned char* in, int *inlen) {
4042 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4043}
4044
4045static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
4046 const unsigned char* in, int *inlen) {
4047 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4048}
4049static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
4050 const unsigned char* in, int *inlen) {
4051 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4052}
4053
4054static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
4055 const unsigned char* in, int *inlen) {
4056 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4057}
4058static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
4059 const unsigned char* in, int *inlen) {
4060 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
4061}
4062
4063static void
4064xmlRegisterCharEncodingHandlersISO8859x (void) {
4065 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
4066 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
4067 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
4068 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
4069 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
4070 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
4071 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
4072 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
4073 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
4074 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
4075 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
4076 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
4077 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
4078 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
4079}
4080
4081#endif
4082#endif
4083
Daniel Veillard5d4644e2005-04-01 13:11:58 +00004084#define bottom_encoding
4085#include "elfgcchack.h"