blob: cd019c51919f317e64fa4635abef915709e388d6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01004 * Related specs:
Owen Taylor3473f882001-02-23 17:55:21 +00005 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
Owen Taylor3473f882001-02-23 17:55:21 +000016 * See Copyright for the status of this software.
17 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000018 * daniel@veillard.com
Daniel Veillard97ac1312001-05-30 19:14:17 +000019 *
Daniel Veillard97ac1312001-05-30 19:14:17 +000020 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor3473f882001-02-23 17:55:21 +000021 */
22
Daniel Veillard34ce8be2002-03-18 19:37:11 +000023#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000024#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000025
Owen Taylor3473f882001-02-23 17:55:21 +000026#include <string.h>
Daniel Veillard18d0db22012-07-13 19:51:15 +080027#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29#ifdef HAVE_CTYPE_H
30#include <ctype.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef LIBXML_ICONV_ENABLED
36#ifdef HAVE_ERRNO_H
37#include <errno.h>
38#endif
39#endif
40#include <libxml/encoding.h>
41#include <libxml/xmlmemory.h>
42#ifdef LIBXML_HTML_ENABLED
43#include <libxml/HTMLparser.h>
44#endif
Daniel Veillard64a411c2001-10-15 12:32:07 +000045#include <libxml/globals.h>
Daniel Veillarda4617b82001-11-04 20:19:12 +000046#include <libxml/xmlerror.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047
Daniel Veillard18d0db22012-07-13 19:51:15 +080048#include "buf.h"
49#include "enc.h"
50
Daniel Veillard22090732001-07-16 00:06:07 +000051static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000053
54typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59};
60
61static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62static int xmlCharEncodingAliasesNb = 0;
63static int xmlCharEncodingAliasesMax = 0;
64
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +010065#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
Owen Taylor3473f882001-02-23 17:55:21 +000066#if 0
67#define DEBUG_ENCODING /* Define this to get encoding traces */
68#endif
William M. Brack16db7b62003-08-07 13:12:49 +000069#else
70#ifdef LIBXML_ISO8859X_ENABLED
71static void xmlRegisterCharEncodingHandlersISO8859x (void);
72#endif
Owen Taylor3473f882001-02-23 17:55:21 +000073#endif
74
75static int xmlLittleEndian = 1;
76
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000077/**
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
80 *
81 * Handle an out of memory condition
82 */
83static void
84xmlEncodingErrMemory(const char *extra)
85{
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87}
88
89/**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
David Kilzer4472c3a2016-05-13 15:13:17 +080096static void LIBXML_ATTR_FORMAT(2,0)
Daniel Veillard1fc3ed02005-08-24 12:46:09 +000097xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98{
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102}
Daniel Veillard97ac1312001-05-30 19:14:17 +0000103
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100104#ifdef LIBXML_ICU_ENABLED
105static uconv_t*
106openIcuConverter(const char* name, int toUnicode)
107{
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->uconv = ucnv_open(name, &status);
114 if (U_FAILURE(status))
115 goto error;
116
117 status = U_ZERO_ERROR;
118 if (toUnicode) {
119 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
121 }
122 else {
123 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124 NULL, NULL, NULL, &status);
125 }
126 if (U_FAILURE(status))
127 goto error;
128
129 status = U_ZERO_ERROR;
130 conv->utf8 = ucnv_open("UTF-8", &status);
131 if (U_SUCCESS(status))
132 return conv;
133
134error:
135 if (conv->uconv)
136 ucnv_close(conv->uconv);
137 xmlFree(conv);
138 return NULL;
139}
140
141static void
142closeIcuConverter(uconv_t *conv)
143{
144 if (conv != NULL) {
145 ucnv_close(conv->uconv);
146 ucnv_close(conv->utf8);
147 xmlFree(conv);
148 }
149}
150#endif /* LIBXML_ICU_ENABLED */
151
Daniel Veillard97ac1312001-05-30 19:14:17 +0000152/************************************************************************
153 * *
154 * Conversions To/From UTF8 encoding *
155 * *
156 ************************************************************************/
157
158/**
Owen Taylor3473f882001-02-23 17:55:21 +0000159 * asciiToUTF8:
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of ASCII chars
163 * @inlen: the length of @in
164 *
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000169 * if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000171 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000172static int
Owen Taylor3473f882001-02-23 17:55:21 +0000173asciiToUTF8(unsigned char* out, int *outlen,
174 const unsigned char* in, int *inlen) {
175 unsigned char* outstart = out;
176 const unsigned char* base = in;
177 const unsigned char* processed = in;
178 unsigned char* outend = out + *outlen;
179 const unsigned char* inend;
180 unsigned int c;
Owen Taylor3473f882001-02-23 17:55:21 +0000181
182 inend = in + (*inlen);
183 while ((in < inend) && (out - outstart + 5 < *outlen)) {
184 c= *in++;
185
Owen Taylor3473f882001-02-23 17:55:21 +0000186 if (out >= outend)
187 break;
Daniel Veillard2728f842006-03-09 16:49:24 +0000188 if (c < 0x80) {
189 *out++ = c;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100190 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000191 *outlen = out - outstart;
192 *inlen = processed - base;
193 return(-1);
194 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100195
Owen Taylor3473f882001-02-23 17:55:21 +0000196 processed = (const unsigned char*) in;
197 }
198 *outlen = out - outstart;
199 *inlen = processed - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000200 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000201}
202
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000203#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000204/**
205 * UTF8Toascii:
206 * @out: a pointer to an array of bytes to store the result
207 * @outlen: the length of @out
208 * @in: a pointer to an array of UTF-8 chars
209 * @inlen: the length of @in
210 *
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
213 *
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000216 * if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000218 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000219static int
Owen Taylor3473f882001-02-23 17:55:21 +0000220UTF8Toascii(unsigned char* out, int *outlen,
221 const unsigned char* in, int *inlen) {
222 const unsigned char* processed = in;
223 const unsigned char* outend;
224 const unsigned char* outstart = out;
225 const unsigned char* instart = in;
226 const unsigned char* inend;
227 unsigned int c, d;
228 int trailing;
229
Daniel Veillardce682bc2004-11-05 17:22:25 +0000230 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 if (in == NULL) {
232 /*
233 * initialization nothing to do
234 */
235 *outlen = 0;
236 *inlen = 0;
237 return(0);
238 }
239 inend = in + (*inlen);
240 outend = out + (*outlen);
241 while (in < inend) {
242 d = *in++;
243 if (d < 0x80) { c= d; trailing= 0; }
244 else if (d < 0xC0) {
245 /* trailing byte in leading position */
246 *outlen = out - outstart;
247 *inlen = processed - instart;
248 return(-2);
249 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
250 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
251 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
252 else {
253 /* no chance for this in Ascii */
254 *outlen = out - outstart;
255 *inlen = processed - instart;
256 return(-2);
257 }
258
259 if (inend - in < trailing) {
260 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100261 }
Owen Taylor3473f882001-02-23 17:55:21 +0000262
263 for ( ; trailing; trailing--) {
264 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
265 break;
266 c <<= 6;
267 c |= d & 0x3F;
268 }
269
270 /* assertion: c is a single UTF-4 value */
271 if (c < 0x80) {
272 if (out >= outend)
273 break;
274 *out++ = c;
275 } else {
276 /* no chance for this in Ascii */
277 *outlen = out - outstart;
278 *inlen = processed - instart;
279 return(-2);
280 }
281 processed = in;
282 }
283 *outlen = out - outstart;
284 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000285 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000286}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000287#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289/**
290 * isolat1ToUTF8:
291 * @out: a pointer to an array of bytes to store the result
292 * @outlen: the length of @out
293 * @in: a pointer to an array of ISO Latin 1 chars
294 * @inlen: the length of @in
295 *
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
Daniel Veillard56de87e2005-02-16 00:22:29 +0000298 * Returns the number of bytes written if success, or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000299 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000300 * if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000302 */
303int
304isolat1ToUTF8(unsigned char* out, int *outlen,
305 const unsigned char* in, int *inlen) {
306 unsigned char* outstart = out;
307 const unsigned char* base = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +0000308 unsigned char* outend;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 const unsigned char* inend;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000310 const unsigned char* instop;
Owen Taylor3473f882001-02-23 17:55:21 +0000311
Daniel Veillardce682bc2004-11-05 17:22:25 +0000312 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
313 return(-1);
314
315 outend = out + *outlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000316 inend = in + (*inlen);
Daniel Veillarde72c7562002-05-31 09:47:30 +0000317 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100318
319 while ((in < inend) && (out < outend - 1)) {
320 if (*in >= 0x80) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000321 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100322 *out++ = ((*in) & 0x3F) | 0x80;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000323 ++in;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000324 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100325 if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 while ((in < instop) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000327 *out++ = *in++;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000328 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +0100329 }
330 if ((in < inend) && (out < outend) && (*in < 0x80)) {
Daniel Veillard182d32a2004-02-09 12:42:55 +0000331 *out++ = *in++;
Owen Taylor3473f882001-02-23 17:55:21 +0000332 }
333 *outlen = out - outstart;
Daniel Veillarde72c7562002-05-31 09:47:30 +0000334 *inlen = in - base;
Daniel Veillard05f97352004-10-31 15:35:32 +0000335 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000336}
337
Daniel Veillard81601f92003-01-14 13:42:37 +0000338/**
339 * UTF8ToUTF8:
340 * @out: a pointer to an array of bytes to store the result
341 * @outlen: the length of @out
342 * @inb: a pointer to an array of UTF-8 chars
343 * @inlenb: the length of @in in UTF-8 chars
344 *
345 * No op copy operation for UTF8 handling.
346 *
William M. Brackf9415e42003-11-28 09:39:10 +0000347 * Returns the number of bytes written, or -1 if lack of space.
Daniel Veillard81601f92003-01-14 13:42:37 +0000348 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000349 * if the return value is positive, else unpredictable.
Daniel Veillard81601f92003-01-14 13:42:37 +0000350 */
351static int
352UTF8ToUTF8(unsigned char* out, int *outlen,
353 const unsigned char* inb, int *inlenb)
354{
355 int len;
356
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +0200357 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
Daniel Veillard81601f92003-01-14 13:42:37 +0000358 return(-1);
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +0200359 if (inb == NULL) {
360 /* inb == NULL means output is initialized. */
361 *outlen = 0;
362 *inlenb = 0;
363 return(0);
364 }
Daniel Veillard81601f92003-01-14 13:42:37 +0000365 if (*outlen > *inlenb) {
366 len = *inlenb;
367 } else {
368 len = *outlen;
369 }
370 if (len < 0)
371 return(-1);
372
373 memcpy(out, inb, len);
374
375 *outlen = len;
376 *inlenb = len;
Daniel Veillard05f97352004-10-31 15:35:32 +0000377 return(*outlen);
Daniel Veillard81601f92003-01-14 13:42:37 +0000378}
379
Daniel Veillarde72c7562002-05-31 09:47:30 +0000380
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000381#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000382/**
383 * UTF8Toisolat1:
384 * @out: a pointer to an array of bytes to store the result
385 * @outlen: the length of @out
386 * @in: a pointer to an array of UTF-8 chars
387 * @inlen: the length of @in
388 *
389 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
390 * block of chars out.
391 *
Daniel Veillard56de87e2005-02-16 00:22:29 +0000392 * Returns the number of bytes written if success, -2 if the transcoding fails,
393 or -1 otherwise
Owen Taylor3473f882001-02-23 17:55:21 +0000394 * The value of @inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000395 * if the return value is positive, else unpredictable.
396 * The value of @outlen after return is the number of octets consumed.
Owen Taylor3473f882001-02-23 17:55:21 +0000397 */
398int
399UTF8Toisolat1(unsigned char* out, int *outlen,
400 const unsigned char* in, int *inlen) {
401 const unsigned char* processed = in;
402 const unsigned char* outend;
403 const unsigned char* outstart = out;
404 const unsigned char* instart = in;
405 const unsigned char* inend;
406 unsigned int c, d;
407 int trailing;
408
Daniel Veillardce682bc2004-11-05 17:22:25 +0000409 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (in == NULL) {
411 /*
412 * initialization nothing to do
413 */
414 *outlen = 0;
415 *inlen = 0;
416 return(0);
417 }
418 inend = in + (*inlen);
419 outend = out + (*outlen);
420 while (in < inend) {
421 d = *in++;
422 if (d < 0x80) { c= d; trailing= 0; }
423 else if (d < 0xC0) {
424 /* trailing byte in leading position */
425 *outlen = out - outstart;
426 *inlen = processed - instart;
427 return(-2);
428 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
429 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
430 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
431 else {
432 /* no chance for this in IsoLat1 */
433 *outlen = out - outstart;
434 *inlen = processed - instart;
435 return(-2);
436 }
437
438 if (inend - in < trailing) {
439 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100440 }
Owen Taylor3473f882001-02-23 17:55:21 +0000441
442 for ( ; trailing; trailing--) {
443 if (in >= inend)
444 break;
445 if (((d= *in++) & 0xC0) != 0x80) {
446 *outlen = out - outstart;
447 *inlen = processed - instart;
448 return(-2);
449 }
450 c <<= 6;
451 c |= d & 0x3F;
452 }
453
454 /* assertion: c is a single UTF-4 value */
455 if (c <= 0xFF) {
456 if (out >= outend)
457 break;
458 *out++ = c;
459 } else {
460 /* no chance for this in IsoLat1 */
461 *outlen = out - outstart;
462 *inlen = processed - instart;
463 return(-2);
464 }
465 processed = in;
466 }
467 *outlen = out - outstart;
468 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000469 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000470}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000471#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000472
473/**
474 * UTF16LEToUTF8:
475 * @out: a pointer to an array of bytes to store the result
476 * @outlen: the length of @out
477 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
478 * @inlenb: the length of @in in UTF-16LE chars
479 *
480 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000481 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000482 * is the same between the native type of this machine and the
483 * inputed one.
484 *
William M. Brackf9415e42003-11-28 09:39:10 +0000485 * Returns the number of bytes written, or -1 if lack of space, or -2
486 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000487 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000488 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000490static int
Owen Taylor3473f882001-02-23 17:55:21 +0000491UTF16LEToUTF8(unsigned char* out, int *outlen,
492 const unsigned char* inb, int *inlenb)
493{
494 unsigned char* outstart = out;
495 const unsigned char* processed = inb;
496 unsigned char* outend = out + *outlen;
497 unsigned short* in = (unsigned short*) inb;
498 unsigned short* inend;
499 unsigned int c, d, inlen;
500 unsigned char *tmp;
501 int bits;
502
503 if ((*inlenb % 2) == 1)
504 (*inlenb)--;
505 inlen = *inlenb / 2;
506 inend = in + inlen;
507 while ((in < inend) && (out - outstart + 5 < *outlen)) {
508 if (xmlLittleEndian) {
509 c= *in++;
510 } else {
511 tmp = (unsigned char *) in;
512 c = *tmp++;
513 c = c | (((unsigned int)*tmp) << 8);
514 in++;
515 }
516 if ((c & 0xFC00) == 0xD800) { /* surrogates */
517 if (in >= inend) { /* (in > inend) shouldn't happens */
518 break;
519 }
520 if (xmlLittleEndian) {
521 d = *in++;
522 } else {
523 tmp = (unsigned char *) in;
524 d = *tmp++;
525 d = d | (((unsigned int)*tmp) << 8);
526 in++;
527 }
528 if ((d & 0xFC00) == 0xDC00) {
529 c &= 0x03FF;
530 c <<= 10;
531 c |= d & 0x03FF;
532 c += 0x10000;
533 }
534 else {
535 *outlen = out - outstart;
536 *inlenb = processed - inb;
537 return(-2);
538 }
539 }
540
541 /* assertion: c is a single UTF-4 value */
542 if (out >= outend)
543 break;
544 if (c < 0x80) { *out++= c; bits= -6; }
545 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
546 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
547 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100548
Owen Taylor3473f882001-02-23 17:55:21 +0000549 for ( ; bits >= 0; bits-= 6) {
550 if (out >= outend)
551 break;
552 *out++= ((c >> bits) & 0x3F) | 0x80;
553 }
554 processed = (const unsigned char*) in;
555 }
556 *outlen = out - outstart;
557 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000558 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000559}
560
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000561#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000562/**
563 * UTF8ToUTF16LE:
564 * @outb: a pointer to an array of bytes to store the result
565 * @outlen: the length of @outb
566 * @in: a pointer to an array of UTF-8 chars
567 * @inlen: the length of @in
568 *
569 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
570 * block of chars out.
571 *
William M. Brackf9415e42003-11-28 09:39:10 +0000572 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100573 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000574 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000575static int
Owen Taylor3473f882001-02-23 17:55:21 +0000576UTF8ToUTF16LE(unsigned char* outb, int *outlen,
577 const unsigned char* in, int *inlen)
578{
579 unsigned short* out = (unsigned short*) outb;
580 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000581 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000582 unsigned short* outstart= out;
583 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000584 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000585 unsigned int c, d;
586 int trailing;
587 unsigned char *tmp;
588 unsigned short tmp1, tmp2;
589
William M. Brackf9415e42003-11-28 09:39:10 +0000590 /* UTF16LE encoding has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000591 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000592 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000593 *outlen = 0;
594 *inlen = 0;
595 return(0);
596 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000597 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000598 outend = out + (*outlen / 2);
599 while (in < inend) {
600 d= *in++;
601 if (d < 0x80) { c= d; trailing= 0; }
602 else if (d < 0xC0) {
603 /* trailing byte in leading position */
604 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000605 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000606 return(-2);
607 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
608 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
609 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
610 else {
611 /* no chance for this in UTF-16 */
612 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000613 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000614 return(-2);
615 }
616
617 if (inend - in < trailing) {
618 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100619 }
Owen Taylor3473f882001-02-23 17:55:21 +0000620
621 for ( ; trailing; trailing--) {
622 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
623 break;
624 c <<= 6;
625 c |= d & 0x3F;
626 }
627
628 /* assertion: c is a single UTF-4 value */
629 if (c < 0x10000) {
630 if (out >= outend)
631 break;
632 if (xmlLittleEndian) {
633 *out++ = c;
634 } else {
635 tmp = (unsigned char *) out;
636 *tmp = c ;
637 *(tmp + 1) = c >> 8 ;
638 out++;
639 }
640 }
641 else if (c < 0x110000) {
642 if (out+1 >= outend)
643 break;
644 c -= 0x10000;
645 if (xmlLittleEndian) {
646 *out++ = 0xD800 | (c >> 10);
647 *out++ = 0xDC00 | (c & 0x03FF);
648 } else {
649 tmp1 = 0xD800 | (c >> 10);
650 tmp = (unsigned char *) out;
651 *tmp = (unsigned char) tmp1;
652 *(tmp + 1) = tmp1 >> 8;
653 out++;
654
655 tmp2 = 0xDC00 | (c & 0x03FF);
656 tmp = (unsigned char *) out;
657 *tmp = (unsigned char) tmp2;
658 *(tmp + 1) = tmp2 >> 8;
659 out++;
660 }
661 }
662 else
663 break;
664 processed = in;
665 }
666 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000667 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000668 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000669}
670
671/**
William M. Brackf9415e42003-11-28 09:39:10 +0000672 * UTF8ToUTF16:
673 * @outb: a pointer to an array of bytes to store the result
674 * @outlen: the length of @outb
675 * @in: a pointer to an array of UTF-8 chars
676 * @inlen: the length of @in
677 *
678 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
679 * block of chars out.
680 *
681 * Returns the number of bytes written, or -1 if lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100682 * if the transcoding failed.
William M. Brackf9415e42003-11-28 09:39:10 +0000683 */
684static int
685UTF8ToUTF16(unsigned char* outb, int *outlen,
686 const unsigned char* in, int *inlen)
687{
688 if (in == NULL) {
689 /*
690 * initialization, add the Byte Order Mark for UTF-16LE
691 */
692 if (*outlen >= 2) {
693 outb[0] = 0xFF;
694 outb[1] = 0xFE;
695 *outlen = 2;
696 *inlen = 0;
697#ifdef DEBUG_ENCODING
698 xmlGenericError(xmlGenericErrorContext,
699 "Added FFFE Byte Order Mark\n");
700#endif
701 return(2);
702 }
703 *outlen = 0;
704 *inlen = 0;
705 return(0);
706 }
707 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
708}
William M. Brack030a7a12004-02-10 12:48:57 +0000709#endif /* LIBXML_OUTPUT_ENABLED */
William M. Brackf9415e42003-11-28 09:39:10 +0000710
711/**
Owen Taylor3473f882001-02-23 17:55:21 +0000712 * UTF16BEToUTF8:
713 * @out: a pointer to an array of bytes to store the result
714 * @outlen: the length of @out
William M. Brackf9415e42003-11-28 09:39:10 +0000715 * @inb: a pointer to an array of UTF-16 passed as a byte array
Owen Taylor3473f882001-02-23 17:55:21 +0000716 * @inlenb: the length of @in in UTF-16 chars
717 *
718 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
William M. Brackf9415e42003-11-28 09:39:10 +0000719 * block of chars out. This function assumes the endian property
Owen Taylor3473f882001-02-23 17:55:21 +0000720 * is the same between the native type of this machine and the
721 * inputed one.
722 *
William M. Brackf9415e42003-11-28 09:39:10 +0000723 * Returns the number of bytes written, or -1 if lack of space, or -2
724 * if the transcoding fails (if *in is not a valid utf16 string)
Owen Taylor3473f882001-02-23 17:55:21 +0000725 * The value of *inlen after return is the number of octets consumed
William M. Brackf9415e42003-11-28 09:39:10 +0000726 * if the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +0000727 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000728static int
Owen Taylor3473f882001-02-23 17:55:21 +0000729UTF16BEToUTF8(unsigned char* out, int *outlen,
730 const unsigned char* inb, int *inlenb)
731{
732 unsigned char* outstart = out;
733 const unsigned char* processed = inb;
734 unsigned char* outend = out + *outlen;
735 unsigned short* in = (unsigned short*) inb;
736 unsigned short* inend;
737 unsigned int c, d, inlen;
738 unsigned char *tmp;
739 int bits;
740
741 if ((*inlenb % 2) == 1)
742 (*inlenb)--;
743 inlen = *inlenb / 2;
744 inend= in + inlen;
745 while (in < inend) {
746 if (xmlLittleEndian) {
747 tmp = (unsigned char *) in;
748 c = *tmp++;
749 c = c << 8;
750 c = c | (unsigned int) *tmp;
751 in++;
752 } else {
753 c= *in++;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100754 }
Owen Taylor3473f882001-02-23 17:55:21 +0000755 if ((c & 0xFC00) == 0xD800) { /* surrogates */
756 if (in >= inend) { /* (in > inend) shouldn't happens */
757 *outlen = out - outstart;
758 *inlenb = processed - inb;
759 return(-2);
760 }
761 if (xmlLittleEndian) {
762 tmp = (unsigned char *) in;
763 d = *tmp++;
764 d = d << 8;
765 d = d | (unsigned int) *tmp;
766 in++;
767 } else {
768 d= *in++;
769 }
770 if ((d & 0xFC00) == 0xDC00) {
771 c &= 0x03FF;
772 c <<= 10;
773 c |= d & 0x03FF;
774 c += 0x10000;
775 }
776 else {
777 *outlen = out - outstart;
778 *inlenb = processed - inb;
779 return(-2);
780 }
781 }
782
783 /* assertion: c is a single UTF-4 value */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100784 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000785 break;
786 if (c < 0x80) { *out++= c; bits= -6; }
787 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
788 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
789 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100790
Owen Taylor3473f882001-02-23 17:55:21 +0000791 for ( ; bits >= 0; bits-= 6) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100792 if (out >= outend)
Owen Taylor3473f882001-02-23 17:55:21 +0000793 break;
794 *out++= ((c >> bits) & 0x3F) | 0x80;
795 }
796 processed = (const unsigned char*) in;
797 }
798 *outlen = out - outstart;
799 *inlenb = processed - inb;
Daniel Veillard05f97352004-10-31 15:35:32 +0000800 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000801}
802
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000803#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000804/**
805 * UTF8ToUTF16BE:
806 * @outb: a pointer to an array of bytes to store the result
807 * @outlen: the length of @outb
808 * @in: a pointer to an array of UTF-8 chars
809 * @inlen: the length of @in
810 *
811 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
812 * block of chars out.
813 *
814 * Returns the number of byte written, or -1 by lack of space, or -2
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100815 * if the transcoding failed.
Owen Taylor3473f882001-02-23 17:55:21 +0000816 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000817static int
Owen Taylor3473f882001-02-23 17:55:21 +0000818UTF8ToUTF16BE(unsigned char* outb, int *outlen,
819 const unsigned char* in, int *inlen)
820{
821 unsigned short* out = (unsigned short*) outb;
822 const unsigned char* processed = in;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000823 const unsigned char *const instart = in;
Owen Taylor3473f882001-02-23 17:55:21 +0000824 unsigned short* outstart= out;
825 unsigned short* outend;
Daniel Veillard2728f842006-03-09 16:49:24 +0000826 const unsigned char* inend;
Owen Taylor3473f882001-02-23 17:55:21 +0000827 unsigned int c, d;
828 int trailing;
829 unsigned char *tmp;
830 unsigned short tmp1, tmp2;
831
William M. Brackf9415e42003-11-28 09:39:10 +0000832 /* UTF-16BE has no BOM */
Daniel Veillardce682bc2004-11-05 17:22:25 +0000833 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000834 if (in == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000835 *outlen = 0;
836 *inlen = 0;
837 return(0);
838 }
Daniel Veillard2728f842006-03-09 16:49:24 +0000839 inend= in + *inlen;
Owen Taylor3473f882001-02-23 17:55:21 +0000840 outend = out + (*outlen / 2);
841 while (in < inend) {
842 d= *in++;
843 if (d < 0x80) { c= d; trailing= 0; }
844 else if (d < 0xC0) {
845 /* trailing byte in leading position */
846 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000847 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000848 return(-2);
849 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
850 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
851 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
852 else {
853 /* no chance for this in UTF-16 */
854 *outlen = out - outstart;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000855 *inlen = processed - instart;
Owen Taylor3473f882001-02-23 17:55:21 +0000856 return(-2);
857 }
858
859 if (inend - in < trailing) {
860 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100861 }
Owen Taylor3473f882001-02-23 17:55:21 +0000862
863 for ( ; trailing; trailing--) {
864 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
865 c <<= 6;
866 c |= d & 0x3F;
867 }
868
869 /* assertion: c is a single UTF-4 value */
870 if (c < 0x10000) {
871 if (out >= outend) break;
872 if (xmlLittleEndian) {
873 tmp = (unsigned char *) out;
874 *tmp = c >> 8;
875 *(tmp + 1) = c;
876 out++;
877 } else {
878 *out++ = c;
879 }
880 }
881 else if (c < 0x110000) {
882 if (out+1 >= outend) break;
883 c -= 0x10000;
884 if (xmlLittleEndian) {
885 tmp1 = 0xD800 | (c >> 10);
886 tmp = (unsigned char *) out;
887 *tmp = tmp1 >> 8;
888 *(tmp + 1) = (unsigned char) tmp1;
889 out++;
890
891 tmp2 = 0xDC00 | (c & 0x03FF);
892 tmp = (unsigned char *) out;
893 *tmp = tmp2 >> 8;
894 *(tmp + 1) = (unsigned char) tmp2;
895 out++;
896 } else {
897 *out++ = 0xD800 | (c >> 10);
898 *out++ = 0xDC00 | (c & 0x03FF);
899 }
900 }
901 else
902 break;
903 processed = in;
904 }
905 *outlen = (out - outstart) * 2;
Daniel Veillardab1ae3a2003-08-14 12:19:54 +0000906 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +0000907 return(*outlen);
Owen Taylor3473f882001-02-23 17:55:21 +0000908}
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000909#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +0000910
Daniel Veillard97ac1312001-05-30 19:14:17 +0000911/************************************************************************
912 * *
913 * Generic encoding handling routines *
914 * *
915 ************************************************************************/
916
Owen Taylor3473f882001-02-23 17:55:21 +0000917/**
918 * xmlDetectCharEncoding:
919 * @in: a pointer to the first bytes of the XML entity, must be at least
William M. Brackf9415e42003-11-28 09:39:10 +0000920 * 2 bytes long (at least 4 if encoding is UTF4 variant).
Owen Taylor3473f882001-02-23 17:55:21 +0000921 * @len: pointer to the length of the buffer
922 *
923 * Guess the encoding of the entity using the first bytes of the entity content
William M. Brackf9415e42003-11-28 09:39:10 +0000924 * according to the non-normative appendix F of the XML-1.0 recommendation.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100925 *
Owen Taylor3473f882001-02-23 17:55:21 +0000926 * Returns one of the XML_CHAR_ENCODING_... values.
927 */
928xmlCharEncoding
929xmlDetectCharEncoding(const unsigned char* in, int len)
930{
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100931 if (in == NULL)
Daniel Veillardce682bc2004-11-05 17:22:25 +0000932 return(XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +0000933 if (len >= 4) {
934 if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 (in[2] == 0x00) && (in[3] == 0x3C))
936 return(XML_CHAR_ENCODING_UCS4BE);
937 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
938 (in[2] == 0x00) && (in[3] == 0x00))
939 return(XML_CHAR_ENCODING_UCS4LE);
940 if ((in[0] == 0x00) && (in[1] == 0x00) &&
941 (in[2] == 0x3C) && (in[3] == 0x00))
942 return(XML_CHAR_ENCODING_UCS4_2143);
943 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
944 (in[2] == 0x00) && (in[3] == 0x00))
945 return(XML_CHAR_ENCODING_UCS4_3412);
946 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
947 (in[2] == 0xA7) && (in[3] == 0x94))
948 return(XML_CHAR_ENCODING_EBCDIC);
949 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
950 (in[2] == 0x78) && (in[3] == 0x6D))
951 return(XML_CHAR_ENCODING_UTF8);
William M. Brackf9415e42003-11-28 09:39:10 +0000952 /*
953 * Although not part of the recommendation, we also
954 * attempt an "auto-recognition" of UTF-16LE and
955 * UTF-16BE encodings.
956 */
957 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
958 (in[2] == 0x3F) && (in[3] == 0x00))
959 return(XML_CHAR_ENCODING_UTF16LE);
960 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
961 (in[2] == 0x00) && (in[3] == 0x3F))
962 return(XML_CHAR_ENCODING_UTF16BE);
Owen Taylor3473f882001-02-23 17:55:21 +0000963 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000964 if (len >= 3) {
965 /*
966 * Errata on XML-1.0 June 20 2001
967 * We now allow an UTF8 encoded BOM
968 */
969 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
970 (in[2] == 0xBF))
971 return(XML_CHAR_ENCODING_UTF8);
972 }
William M. Brackf9415e42003-11-28 09:39:10 +0000973 /* For UTF-16 we can recognize by the BOM */
Owen Taylor3473f882001-02-23 17:55:21 +0000974 if (len >= 2) {
975 if ((in[0] == 0xFE) && (in[1] == 0xFF))
976 return(XML_CHAR_ENCODING_UTF16BE);
977 if ((in[0] == 0xFF) && (in[1] == 0xFE))
978 return(XML_CHAR_ENCODING_UTF16LE);
979 }
980 return(XML_CHAR_ENCODING_NONE);
981}
982
983/**
984 * xmlCleanupEncodingAliases:
985 *
986 * Unregisters all aliases
987 */
988void
989xmlCleanupEncodingAliases(void) {
990 int i;
991
992 if (xmlCharEncodingAliases == NULL)
993 return;
994
995 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
996 if (xmlCharEncodingAliases[i].name != NULL)
997 xmlFree((char *) xmlCharEncodingAliases[i].name);
998 if (xmlCharEncodingAliases[i].alias != NULL)
999 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1000 }
1001 xmlCharEncodingAliasesNb = 0;
1002 xmlCharEncodingAliasesMax = 0;
1003 xmlFree(xmlCharEncodingAliases);
Daniel Veillard73c6e532002-01-08 13:15:33 +00001004 xmlCharEncodingAliases = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001005}
1006
1007/**
1008 * xmlGetEncodingAlias:
1009 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1010 *
1011 * Lookup an encoding name for the given alias.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001012 *
William M. Brackf9415e42003-11-28 09:39:10 +00001013 * Returns NULL if not found, otherwise the original name
Owen Taylor3473f882001-02-23 17:55:21 +00001014 */
1015const char *
1016xmlGetEncodingAlias(const char *alias) {
1017 int i;
1018 char upper[100];
1019
1020 if (alias == NULL)
1021 return(NULL);
1022
1023 if (xmlCharEncodingAliases == NULL)
1024 return(NULL);
1025
1026 for (i = 0;i < 99;i++) {
1027 upper[i] = toupper(alias[i]);
1028 if (upper[i] == 0) break;
1029 }
1030 upper[i] = 0;
1031
1032 /*
1033 * Walk down the list looking for a definition of the alias
1034 */
1035 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1036 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1037 return(xmlCharEncodingAliases[i].name);
1038 }
1039 }
1040 return(NULL);
1041}
1042
1043/**
1044 * xmlAddEncodingAlias:
1045 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1046 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1047 *
William M. Brackf9415e42003-11-28 09:39:10 +00001048 * Registers an alias @alias for an encoding named @name. Existing alias
Owen Taylor3473f882001-02-23 17:55:21 +00001049 * will be overwritten.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001050 *
Owen Taylor3473f882001-02-23 17:55:21 +00001051 * Returns 0 in case of success, -1 in case of error
1052 */
1053int
1054xmlAddEncodingAlias(const char *name, const char *alias) {
1055 int i;
1056 char upper[100];
1057
1058 if ((name == NULL) || (alias == NULL))
1059 return(-1);
1060
1061 for (i = 0;i < 99;i++) {
1062 upper[i] = toupper(alias[i]);
1063 if (upper[i] == 0) break;
1064 }
1065 upper[i] = 0;
1066
1067 if (xmlCharEncodingAliases == NULL) {
1068 xmlCharEncodingAliasesNb = 0;
1069 xmlCharEncodingAliasesMax = 20;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001070 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001071 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1072 if (xmlCharEncodingAliases == NULL)
1073 return(-1);
1074 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1075 xmlCharEncodingAliasesMax *= 2;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001076 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
Owen Taylor3473f882001-02-23 17:55:21 +00001077 xmlRealloc(xmlCharEncodingAliases,
1078 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1079 }
1080 /*
1081 * Walk down the list looking for a definition of the alias
1082 */
1083 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1084 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1085 /*
1086 * Replace the definition.
1087 */
1088 xmlFree((char *) xmlCharEncodingAliases[i].name);
1089 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1090 return(0);
1091 }
1092 }
1093 /*
1094 * Add the definition
1095 */
1096 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1097 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1098 xmlCharEncodingAliasesNb++;
1099 return(0);
1100}
1101
1102/**
1103 * xmlDelEncodingAlias:
1104 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1105 *
1106 * Unregisters an encoding alias @alias
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001107 *
Owen Taylor3473f882001-02-23 17:55:21 +00001108 * Returns 0 in case of success, -1 in case of error
1109 */
1110int
1111xmlDelEncodingAlias(const char *alias) {
1112 int i;
1113
1114 if (alias == NULL)
1115 return(-1);
1116
1117 if (xmlCharEncodingAliases == NULL)
1118 return(-1);
1119 /*
1120 * Walk down the list looking for a definition of the alias
1121 */
1122 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1123 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1124 xmlFree((char *) xmlCharEncodingAliases[i].name);
1125 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1126 xmlCharEncodingAliasesNb--;
1127 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1128 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1129 return(0);
1130 }
1131 }
1132 return(-1);
1133}
1134
1135/**
1136 * xmlParseCharEncoding:
1137 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1138 *
William M. Brackf9415e42003-11-28 09:39:10 +00001139 * Compare the string to the encoding schemes already known. Note
Owen Taylor3473f882001-02-23 17:55:21 +00001140 * that the comparison is case insensitive accordingly to the section
1141 * [XML] 4.3.3 Character Encoding in Entities.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001142 *
Owen Taylor3473f882001-02-23 17:55:21 +00001143 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1144 * if not recognized.
1145 */
1146xmlCharEncoding
1147xmlParseCharEncoding(const char* name)
1148{
1149 const char *alias;
1150 char upper[500];
1151 int i;
1152
1153 if (name == NULL)
1154 return(XML_CHAR_ENCODING_NONE);
1155
1156 /*
1157 * Do the alias resolution
1158 */
1159 alias = xmlGetEncodingAlias(name);
1160 if (alias != NULL)
1161 name = alias;
1162
1163 for (i = 0;i < 499;i++) {
1164 upper[i] = toupper(name[i]);
1165 if (upper[i] == 0) break;
1166 }
1167 upper[i] = 0;
1168
1169 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1170 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1171 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1172
1173 /*
1174 * NOTE: if we were able to parse this, the endianness of UTF16 is
1175 * already found and in use
1176 */
1177 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1178 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001179
Owen Taylor3473f882001-02-23 17:55:21 +00001180 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1181 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1183
1184 /*
1185 * NOTE: if we were able to parse this, the endianness of UCS4 is
1186 * already found and in use
1187 */
1188 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1189 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001192
Owen Taylor3473f882001-02-23 17:55:21 +00001193 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1194 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1195 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1196
1197 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1198 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1199 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1200
1201 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1202 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1203 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1204 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1205 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1206 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1207 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1208
1209 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1210 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1211 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1212
1213#ifdef DEBUG_ENCODING
1214 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1215#endif
1216 return(XML_CHAR_ENCODING_ERROR);
1217}
1218
1219/**
1220 * xmlGetCharEncodingName:
1221 * @enc: the encoding
1222 *
1223 * The "canonical" name for XML encoding.
1224 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1225 * Section 4.3.3 Character Encoding in Entities
1226 *
1227 * Returns the canonical name for the given encoding
1228 */
1229
1230const char*
1231xmlGetCharEncodingName(xmlCharEncoding enc) {
1232 switch (enc) {
1233 case XML_CHAR_ENCODING_ERROR:
1234 return(NULL);
1235 case XML_CHAR_ENCODING_NONE:
1236 return(NULL);
1237 case XML_CHAR_ENCODING_UTF8:
1238 return("UTF-8");
1239 case XML_CHAR_ENCODING_UTF16LE:
1240 return("UTF-16");
1241 case XML_CHAR_ENCODING_UTF16BE:
1242 return("UTF-16");
1243 case XML_CHAR_ENCODING_EBCDIC:
1244 return("EBCDIC");
1245 case XML_CHAR_ENCODING_UCS4LE:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS4BE:
1248 return("ISO-10646-UCS-4");
1249 case XML_CHAR_ENCODING_UCS4_2143:
1250 return("ISO-10646-UCS-4");
1251 case XML_CHAR_ENCODING_UCS4_3412:
1252 return("ISO-10646-UCS-4");
1253 case XML_CHAR_ENCODING_UCS2:
1254 return("ISO-10646-UCS-2");
1255 case XML_CHAR_ENCODING_8859_1:
1256 return("ISO-8859-1");
1257 case XML_CHAR_ENCODING_8859_2:
1258 return("ISO-8859-2");
1259 case XML_CHAR_ENCODING_8859_3:
1260 return("ISO-8859-3");
1261 case XML_CHAR_ENCODING_8859_4:
1262 return("ISO-8859-4");
1263 case XML_CHAR_ENCODING_8859_5:
1264 return("ISO-8859-5");
1265 case XML_CHAR_ENCODING_8859_6:
1266 return("ISO-8859-6");
1267 case XML_CHAR_ENCODING_8859_7:
1268 return("ISO-8859-7");
1269 case XML_CHAR_ENCODING_8859_8:
1270 return("ISO-8859-8");
1271 case XML_CHAR_ENCODING_8859_9:
1272 return("ISO-8859-9");
1273 case XML_CHAR_ENCODING_2022_JP:
1274 return("ISO-2022-JP");
1275 case XML_CHAR_ENCODING_SHIFT_JIS:
1276 return("Shift-JIS");
1277 case XML_CHAR_ENCODING_EUC_JP:
1278 return("EUC-JP");
1279 case XML_CHAR_ENCODING_ASCII:
1280 return(NULL);
1281 }
1282 return(NULL);
1283}
1284
Daniel Veillard97ac1312001-05-30 19:14:17 +00001285/************************************************************************
1286 * *
1287 * Char encoding handlers *
1288 * *
1289 ************************************************************************/
1290
Owen Taylor3473f882001-02-23 17:55:21 +00001291
1292/* the size should be growable, but it's not a big deal ... */
1293#define MAX_ENCODING_HANDLERS 50
1294static xmlCharEncodingHandlerPtr *handlers = NULL;
1295static int nbCharEncodingHandler = 0;
1296
1297/*
1298 * The default is UTF-8 for XML, that's also the default used for the
1299 * parser internals, so the default encoding handler is NULL
1300 */
1301
1302static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1303
1304/**
1305 * xmlNewCharEncodingHandler:
1306 * @name: the encoding name, in UTF-8 format (ASCII actually)
1307 * @input: the xmlCharEncodingInputFunc to read that encoding
1308 * @output: the xmlCharEncodingOutputFunc to write that encoding
1309 *
1310 * Create and registers an xmlCharEncodingHandler.
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001311 *
Owen Taylor3473f882001-02-23 17:55:21 +00001312 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1313 */
Daniel Veillard6f46f6c2002-08-01 12:22:24 +00001314xmlCharEncodingHandlerPtr
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001315xmlNewCharEncodingHandler(const char *name,
Owen Taylor3473f882001-02-23 17:55:21 +00001316 xmlCharEncodingInputFunc input,
1317 xmlCharEncodingOutputFunc output) {
1318 xmlCharEncodingHandlerPtr handler;
1319 const char *alias;
1320 char upper[500];
1321 int i;
Daniel Veillard24505b02005-07-28 23:49:35 +00001322 char *up = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00001323
1324 /*
1325 * Do the alias resolution
1326 */
1327 alias = xmlGetEncodingAlias(name);
1328 if (alias != NULL)
1329 name = alias;
1330
1331 /*
1332 * Keep only the uppercase version of the encoding.
1333 */
1334 if (name == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001335 xmlEncodingErr(XML_I18N_NO_NAME,
1336 "xmlNewCharEncodingHandler : no name !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001337 return(NULL);
1338 }
1339 for (i = 0;i < 499;i++) {
1340 upper[i] = toupper(name[i]);
1341 if (upper[i] == 0) break;
1342 }
1343 upper[i] = 0;
1344 up = xmlMemStrdup(upper);
1345 if (up == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001346 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001347 return(NULL);
1348 }
1349
1350 /*
1351 * allocate and fill-up an handler block.
1352 */
1353 handler = (xmlCharEncodingHandlerPtr)
1354 xmlMalloc(sizeof(xmlCharEncodingHandler));
1355 if (handler == NULL) {
William M. Bracka3215c72004-07-31 16:24:01 +00001356 xmlFree(up);
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001357 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001358 return(NULL);
1359 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001360 memset(handler, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001361 handler->input = input;
1362 handler->output = output;
1363 handler->name = up;
1364
1365#ifdef LIBXML_ICONV_ENABLED
1366 handler->iconv_in = NULL;
1367 handler->iconv_out = NULL;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001368#endif
1369#ifdef LIBXML_ICU_ENABLED
1370 handler->uconv_in = NULL;
1371 handler->uconv_out = NULL;
1372#endif
Owen Taylor3473f882001-02-23 17:55:21 +00001373
1374 /*
1375 * registers and returns the handler.
1376 */
1377 xmlRegisterCharEncodingHandler(handler);
1378#ifdef DEBUG_ENCODING
1379 xmlGenericError(xmlGenericErrorContext,
1380 "Registered encoding handler for %s\n", name);
1381#endif
1382 return(handler);
1383}
1384
1385/**
1386 * xmlInitCharEncodingHandlers:
1387 *
1388 * Initialize the char encoding support, it registers the default
1389 * encoding supported.
1390 * NOTE: while public, this function usually doesn't need to be called
1391 * in normal processing.
1392 */
1393void
1394xmlInitCharEncodingHandlers(void) {
1395 unsigned short int tst = 0x1234;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001396 unsigned char *ptr = (unsigned char *) &tst;
Owen Taylor3473f882001-02-23 17:55:21 +00001397
1398 if (handlers != NULL) return;
1399
1400 handlers = (xmlCharEncodingHandlerPtr *)
1401 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1402
1403 if (*ptr == 0x12) xmlLittleEndian = 0;
1404 else if (*ptr == 0x34) xmlLittleEndian = 1;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001405 else {
1406 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1407 "Odd problem at endianness detection\n", NULL);
1408 }
Owen Taylor3473f882001-02-23 17:55:21 +00001409
1410 if (handlers == NULL) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001411 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001412 return;
1413 }
Daniel Veillard81601f92003-01-14 13:42:37 +00001414 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001415#ifdef LIBXML_OUTPUT_ENABLED
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001416 xmlUTF16LEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001417 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001418 xmlUTF16BEHandler =
Owen Taylor3473f882001-02-23 17:55:21 +00001419 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
William M. Brackf9415e42003-11-28 09:39:10 +00001420 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
Owen Taylor3473f882001-02-23 17:55:21 +00001421 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1422 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard20042422001-05-31 18:22:04 +00001423 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor3473f882001-02-23 17:55:21 +00001424#ifdef LIBXML_HTML_ENABLED
1425 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1426#endif
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001427#else
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001428 xmlUTF16LEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001429 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001430 xmlUTF16BEHandler =
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001431 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
William M. Brackf9415e42003-11-28 09:39:10 +00001432 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001433 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1434 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1435 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1436#endif /* LIBXML_OUTPUT_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001437#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00001438#ifdef LIBXML_ISO8859X_ENABLED
1439 xmlRegisterCharEncodingHandlersISO8859x ();
1440#endif
1441#endif
1442
Owen Taylor3473f882001-02-23 17:55:21 +00001443}
1444
1445/**
1446 * xmlCleanupCharEncodingHandlers:
1447 *
1448 * Cleanup the memory allocated for the char encoding support, it
1449 * unregisters all the encoding handlers and the aliases.
1450 */
1451void
1452xmlCleanupCharEncodingHandlers(void) {
1453 xmlCleanupEncodingAliases();
1454
1455 if (handlers == NULL) return;
1456
1457 for (;nbCharEncodingHandler > 0;) {
1458 nbCharEncodingHandler--;
1459 if (handlers[nbCharEncodingHandler] != NULL) {
1460 if (handlers[nbCharEncodingHandler]->name != NULL)
1461 xmlFree(handlers[nbCharEncodingHandler]->name);
1462 xmlFree(handlers[nbCharEncodingHandler]);
1463 }
1464 }
1465 xmlFree(handlers);
1466 handlers = NULL;
1467 nbCharEncodingHandler = 0;
1468 xmlDefaultCharEncodingHandler = NULL;
1469}
1470
1471/**
1472 * xmlRegisterCharEncodingHandler:
1473 * @handler: the xmlCharEncodingHandlerPtr handler block
1474 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001475 * Register the char encoding handler, surprising, isn't it ?
Owen Taylor3473f882001-02-23 17:55:21 +00001476 */
1477void
1478xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1479 if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard76d36452009-09-07 11:19:33 +02001480 if ((handler == NULL) || (handlers == NULL)) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001481 xmlEncodingErr(XML_I18N_NO_HANDLER,
1482 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001483 return;
1484 }
1485
1486 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001487 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1488 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1489 "MAX_ENCODING_HANDLERS");
Owen Taylor3473f882001-02-23 17:55:21 +00001490 return;
1491 }
1492 handlers[nbCharEncodingHandler++] = handler;
1493}
1494
1495/**
1496 * xmlGetCharEncodingHandler:
1497 * @enc: an xmlCharEncoding value.
1498 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001499 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001500 *
1501 * Returns the handler or NULL if not found
1502 */
1503xmlCharEncodingHandlerPtr
1504xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1505 xmlCharEncodingHandlerPtr handler;
1506
1507 if (handlers == NULL) xmlInitCharEncodingHandlers();
1508 switch (enc) {
1509 case XML_CHAR_ENCODING_ERROR:
1510 return(NULL);
1511 case XML_CHAR_ENCODING_NONE:
1512 return(NULL);
1513 case XML_CHAR_ENCODING_UTF8:
1514 return(NULL);
1515 case XML_CHAR_ENCODING_UTF16LE:
1516 return(xmlUTF16LEHandler);
1517 case XML_CHAR_ENCODING_UTF16BE:
1518 return(xmlUTF16BEHandler);
1519 case XML_CHAR_ENCODING_EBCDIC:
1520 handler = xmlFindCharEncodingHandler("EBCDIC");
1521 if (handler != NULL) return(handler);
1522 handler = xmlFindCharEncodingHandler("ebcdic");
1523 if (handler != NULL) return(handler);
Martin Köglerc78988a2009-08-24 16:47:48 +02001524 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1525 if (handler != NULL) return(handler);
Petr Sumbera6f49c732012-12-12 15:41:30 +08001526 handler = xmlFindCharEncodingHandler("IBM-037");
1527 if (handler != NULL) return(handler);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 break;
1529 case XML_CHAR_ENCODING_UCS4BE:
1530 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531 if (handler != NULL) return(handler);
1532 handler = xmlFindCharEncodingHandler("UCS-4");
1533 if (handler != NULL) return(handler);
1534 handler = xmlFindCharEncodingHandler("UCS4");
1535 if (handler != NULL) return(handler);
1536 break;
1537 case XML_CHAR_ENCODING_UCS4LE:
1538 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1539 if (handler != NULL) return(handler);
1540 handler = xmlFindCharEncodingHandler("UCS-4");
1541 if (handler != NULL) return(handler);
1542 handler = xmlFindCharEncodingHandler("UCS4");
1543 if (handler != NULL) return(handler);
1544 break;
1545 case XML_CHAR_ENCODING_UCS4_2143:
1546 break;
1547 case XML_CHAR_ENCODING_UCS4_3412:
1548 break;
1549 case XML_CHAR_ENCODING_UCS2:
1550 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1551 if (handler != NULL) return(handler);
1552 handler = xmlFindCharEncodingHandler("UCS-2");
1553 if (handler != NULL) return(handler);
1554 handler = xmlFindCharEncodingHandler("UCS2");
1555 if (handler != NULL) return(handler);
1556 break;
1557
1558 /*
1559 * We used to keep ISO Latin encodings native in the
1560 * generated data. This led to so many problems that
1561 * this has been removed. One can still change this
1562 * back by registering no-ops encoders for those
1563 */
1564 case XML_CHAR_ENCODING_8859_1:
1565 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1566 if (handler != NULL) return(handler);
1567 break;
1568 case XML_CHAR_ENCODING_8859_2:
1569 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1570 if (handler != NULL) return(handler);
1571 break;
1572 case XML_CHAR_ENCODING_8859_3:
1573 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1574 if (handler != NULL) return(handler);
1575 break;
1576 case XML_CHAR_ENCODING_8859_4:
1577 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1578 if (handler != NULL) return(handler);
1579 break;
1580 case XML_CHAR_ENCODING_8859_5:
1581 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1582 if (handler != NULL) return(handler);
1583 break;
1584 case XML_CHAR_ENCODING_8859_6:
1585 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1586 if (handler != NULL) return(handler);
1587 break;
1588 case XML_CHAR_ENCODING_8859_7:
1589 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1590 if (handler != NULL) return(handler);
1591 break;
1592 case XML_CHAR_ENCODING_8859_8:
1593 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1594 if (handler != NULL) return(handler);
1595 break;
1596 case XML_CHAR_ENCODING_8859_9:
1597 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1598 if (handler != NULL) return(handler);
1599 break;
1600
1601
1602 case XML_CHAR_ENCODING_2022_JP:
1603 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1604 if (handler != NULL) return(handler);
1605 break;
1606 case XML_CHAR_ENCODING_SHIFT_JIS:
1607 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1608 if (handler != NULL) return(handler);
1609 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1610 if (handler != NULL) return(handler);
1611 handler = xmlFindCharEncodingHandler("Shift_JIS");
1612 if (handler != NULL) return(handler);
1613 break;
1614 case XML_CHAR_ENCODING_EUC_JP:
1615 handler = xmlFindCharEncodingHandler("EUC-JP");
1616 if (handler != NULL) return(handler);
1617 break;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001618 default:
Owen Taylor3473f882001-02-23 17:55:21 +00001619 break;
1620 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001621
Owen Taylor3473f882001-02-23 17:55:21 +00001622#ifdef DEBUG_ENCODING
1623 xmlGenericError(xmlGenericErrorContext,
1624 "No handler found for encoding %d\n", enc);
1625#endif
1626 return(NULL);
1627}
1628
1629/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001630 * xmlFindCharEncodingHandler:
1631 * @name: a string describing the char encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001632 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001633 * Search in the registered set the handler able to read/write that encoding.
Owen Taylor3473f882001-02-23 17:55:21 +00001634 *
1635 * Returns the handler or NULL if not found
1636 */
1637xmlCharEncodingHandlerPtr
1638xmlFindCharEncodingHandler(const char *name) {
1639 const char *nalias;
1640 const char *norig;
1641 xmlCharEncoding alias;
1642#ifdef LIBXML_ICONV_ENABLED
1643 xmlCharEncodingHandlerPtr enc;
1644 iconv_t icv_in, icv_out;
1645#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001646#ifdef LIBXML_ICU_ENABLED
1647 xmlCharEncodingHandlerPtr encu;
1648 uconv_t *ucv_in, *ucv_out;
1649#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001650 char upper[100];
1651 int i;
1652
1653 if (handlers == NULL) xmlInitCharEncodingHandlers();
1654 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1655 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1656
1657 /*
1658 * Do the alias resolution
1659 */
1660 norig = name;
1661 nalias = xmlGetEncodingAlias(name);
1662 if (nalias != NULL)
1663 name = nalias;
1664
1665 /*
1666 * Check first for directly registered encoding names
1667 */
1668 for (i = 0;i < 99;i++) {
1669 upper[i] = toupper(name[i]);
1670 if (upper[i] == 0) break;
1671 }
1672 upper[i] = 0;
1673
Daniel Veillardd44b9362009-09-07 12:15:08 +02001674 if (handlers != NULL) {
1675 for (i = 0;i < nbCharEncodingHandler; i++) {
1676 if (!strcmp(upper, handlers[i]->name)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001677#ifdef DEBUG_ENCODING
Daniel Veillardd44b9362009-09-07 12:15:08 +02001678 xmlGenericError(xmlGenericErrorContext,
1679 "Found registered handler for encoding %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001680#endif
Daniel Veillardd44b9362009-09-07 12:15:08 +02001681 return(handlers[i]);
1682 }
1683 }
1684 }
Owen Taylor3473f882001-02-23 17:55:21 +00001685
1686#ifdef LIBXML_ICONV_ENABLED
1687 /* check whether iconv can handle this */
1688 icv_in = iconv_open("UTF-8", name);
1689 icv_out = iconv_open(name, "UTF-8");
Daniel Veillard28aac0b2006-10-16 08:31:18 +00001690 if (icv_in == (iconv_t) -1) {
1691 icv_in = iconv_open("UTF-8", upper);
1692 }
1693 if (icv_out == (iconv_t) -1) {
1694 icv_out = iconv_open(upper, "UTF-8");
1695 }
Owen Taylor3473f882001-02-23 17:55:21 +00001696 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1697 enc = (xmlCharEncodingHandlerPtr)
1698 xmlMalloc(sizeof(xmlCharEncodingHandler));
1699 if (enc == NULL) {
1700 iconv_close(icv_in);
1701 iconv_close(icv_out);
1702 return(NULL);
1703 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001704 memset(enc, 0, sizeof(xmlCharEncodingHandler));
Owen Taylor3473f882001-02-23 17:55:21 +00001705 enc->name = xmlMemStrdup(name);
1706 enc->input = NULL;
1707 enc->output = NULL;
1708 enc->iconv_in = icv_in;
1709 enc->iconv_out = icv_out;
1710#ifdef DEBUG_ENCODING
1711 xmlGenericError(xmlGenericErrorContext,
1712 "Found iconv handler for encoding %s\n", name);
1713#endif
1714 return enc;
1715 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00001716 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00001717 "iconv : problems with filters for '%s'\n", name);
1718 }
1719#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001720#ifdef LIBXML_ICU_ENABLED
1721 /* check whether icu can handle this */
1722 ucv_in = openIcuConverter(name, 1);
1723 ucv_out = openIcuConverter(name, 0);
1724 if (ucv_in != NULL && ucv_out != NULL) {
1725 encu = (xmlCharEncodingHandlerPtr)
1726 xmlMalloc(sizeof(xmlCharEncodingHandler));
1727 if (encu == NULL) {
1728 closeIcuConverter(ucv_in);
1729 closeIcuConverter(ucv_out);
1730 return(NULL);
1731 }
1732 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1733 encu->name = xmlMemStrdup(name);
1734 encu->input = NULL;
1735 encu->output = NULL;
1736 encu->uconv_in = ucv_in;
1737 encu->uconv_out = ucv_out;
1738#ifdef DEBUG_ENCODING
1739 xmlGenericError(xmlGenericErrorContext,
1740 "Found ICU converter handler for encoding %s\n", name);
1741#endif
1742 return encu;
1743 } else if (ucv_in != NULL || ucv_out != NULL) {
1744 closeIcuConverter(ucv_in);
1745 closeIcuConverter(ucv_out);
1746 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1747 "ICU converter : problems with filters for '%s'\n", name);
1748 }
1749#endif /* LIBXML_ICU_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00001750
1751#ifdef DEBUG_ENCODING
1752 xmlGenericError(xmlGenericErrorContext,
1753 "No handler found for encoding %s\n", name);
1754#endif
1755
1756 /*
1757 * Fallback using the canonical names
1758 */
1759 alias = xmlParseCharEncoding(norig);
1760 if (alias != XML_CHAR_ENCODING_ERROR) {
1761 const char* canon;
1762 canon = xmlGetCharEncodingName(alias);
1763 if ((canon != NULL) && (strcmp(name, canon))) {
1764 return(xmlFindCharEncodingHandler(canon));
1765 }
1766 }
1767
William M. Brackf9415e42003-11-28 09:39:10 +00001768 /* If "none of the above", give up */
Owen Taylor3473f882001-02-23 17:55:21 +00001769 return(NULL);
1770}
1771
Daniel Veillard97ac1312001-05-30 19:14:17 +00001772/************************************************************************
1773 * *
1774 * ICONV based generic conversion functions *
1775 * *
1776 ************************************************************************/
1777
Owen Taylor3473f882001-02-23 17:55:21 +00001778#ifdef LIBXML_ICONV_ENABLED
1779/**
1780 * xmlIconvWrapper:
1781 * @cd: iconv converter data structure
1782 * @out: a pointer to an array of bytes to store the result
1783 * @outlen: the length of @out
1784 * @in: a pointer to an array of ISO Latin 1 chars
1785 * @inlen: the length of @in
1786 *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001787 * Returns 0 if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001788 * -1 by lack of space, or
1789 * -2 if the transcoding fails (for *in is not valid utf8 string or
1790 * the result of transformation can't fit into the encoding we want), or
1791 * -3 if there the last byte can't form a single output char.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001792 *
Owen Taylor3473f882001-02-23 17:55:21 +00001793 * The value of @inlen after return is the number of octets consumed
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001794 * as the return value is positive, else unpredictable.
Owen Taylor3473f882001-02-23 17:55:21 +00001795 * The value of @outlen after return is the number of ocetes consumed.
1796 */
1797static int
Daniel Veillardce682bc2004-11-05 17:22:25 +00001798xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1799 const unsigned char *in, int *inlen) {
1800 size_t icv_inlen, icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001801 const char *icv_in = (const char *) in;
1802 char *icv_out = (char *) out;
1803 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001804
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001805 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1806 if (outlen != NULL) *outlen = 0;
Daniel Veillardce682bc2004-11-05 17:22:25 +00001807 return(-1);
Daniel Veillard01ca83c2004-11-06 13:26:59 +00001808 }
Daniel Veillardce682bc2004-11-05 17:22:25 +00001809 icv_inlen = *inlen;
1810 icv_outlen = *outlen;
Daniel Veillard8e1a46d2008-02-15 07:47:26 +00001811 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard2728f842006-03-09 16:49:24 +00001812 *inlen -= icv_inlen;
1813 *outlen -= icv_outlen;
Daniel Veillard9403a042001-05-28 11:00:53 +00001814 if ((icv_inlen != 0) || (ret == -1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001815#ifdef EILSEQ
Daniel Veillard9403a042001-05-28 11:00:53 +00001816 if (errno == EILSEQ) {
1817 return -2;
1818 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001819#endif
1820#ifdef E2BIG
Daniel Veillard9403a042001-05-28 11:00:53 +00001821 if (errno == E2BIG) {
1822 return -1;
1823 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001824#endif
1825#ifdef EINVAL
Daniel Veillard9403a042001-05-28 11:00:53 +00001826 if (errno == EINVAL) {
1827 return -3;
1828 } else
Owen Taylor3473f882001-02-23 17:55:21 +00001829#endif
Daniel Veillard9403a042001-05-28 11:00:53 +00001830 {
1831 return -3;
1832 }
1833 }
1834 return 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001835}
1836#endif /* LIBXML_ICONV_ENABLED */
1837
Daniel Veillard97ac1312001-05-30 19:14:17 +00001838/************************************************************************
1839 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001840 * ICU based generic conversion functions *
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001841 * *
1842 ************************************************************************/
1843
1844#ifdef LIBXML_ICU_ENABLED
1845/**
1846 * xmlUconvWrapper:
1847 * @cd: ICU uconverter data structure
1848 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1849 * @out: a pointer to an array of bytes to store the result
1850 * @outlen: the length of @out
1851 * @in: a pointer to an array of ISO Latin 1 chars
1852 * @inlen: the length of @in
1853 *
1854 * Returns 0 if success, or
1855 * -1 by lack of space, or
1856 * -2 if the transcoding fails (for *in is not valid utf8 string or
1857 * the result of transformation can't fit into the encoding we want), or
1858 * -3 if there the last byte can't form a single output char.
1859 *
1860 * The value of @inlen after return is the number of octets consumed
1861 * as the return value is positive, else unpredictable.
1862 * The value of @outlen after return is the number of ocetes consumed.
1863 */
1864static int
1865xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1866 const unsigned char *in, int *inlen) {
1867 const char *ucv_in = (const char *) in;
1868 char *ucv_out = (char *) out;
1869 UErrorCode err = U_ZERO_ERROR;
1870
1871 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1872 if (outlen != NULL) *outlen = 0;
1873 return(-1);
1874 }
1875
1876 /*
1877 * TODO(jungshik)
1878 * 1. is ucnv_convert(To|From)Algorithmic better?
1879 * 2. had we better use an explicit pivot buffer?
1880 * 3. error returned comes from 'fromUnicode' only even
1881 * when toUnicode is true !
1882 */
1883 if (toUnicode) {
1884 /* encoding => UTF-16 => UTF-8 */
1885 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1886 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1887 0, TRUE, &err);
1888 } else {
1889 /* UTF-8 => UTF-16 => encoding */
1890 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1891 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1892 0, TRUE, &err);
1893 }
1894 *inlen = ucv_in - (const char*) in;
1895 *outlen = ucv_out - (char *) out;
1896 if (U_SUCCESS(err))
1897 return 0;
1898 if (err == U_BUFFER_OVERFLOW_ERROR)
1899 return -1;
1900 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1901 return -2;
1902 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1903 return -3;
1904}
1905#endif /* LIBXML_ICU_ENABLED */
1906
1907/************************************************************************
1908 * *
Daniel Veillard97ac1312001-05-30 19:14:17 +00001909 * The real API used by libxml for on-the-fly conversion *
1910 * *
1911 ************************************************************************/
1912
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02001913static int
1914xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1915 int *outlen, const unsigned char *in, int *inlen) {
1916 int ret;
1917
1918 if (handler->input != NULL) {
1919 ret = handler->input(out, outlen, in, inlen);
1920 }
1921#ifdef LIBXML_ICONV_ENABLED
1922 else if (handler->iconv_in != NULL) {
1923 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1924 }
1925#endif /* LIBXML_ICONV_ENABLED */
1926#ifdef LIBXML_ICU_ENABLED
1927 else if (handler->uconv_in != NULL) {
1928 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
1929 }
1930#endif /* LIBXML_ICU_ENABLED */
1931 else {
1932 *outlen = 0;
1933 *inlen = 0;
1934 ret = -2;
1935 }
1936
1937 return(ret);
1938}
1939
1940/* Returns -4 if no output function was found. */
1941static int
1942xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1943 int *outlen, const unsigned char *in, int *inlen) {
1944 int ret;
1945
1946 if (handler->output != NULL) {
1947 ret = handler->output(out, outlen, in, inlen);
1948 }
1949#ifdef LIBXML_ICONV_ENABLED
1950 else if (handler->iconv_out != NULL) {
1951 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1952 }
1953#endif /* LIBXML_ICONV_ENABLED */
1954#ifdef LIBXML_ICU_ENABLED
1955 else if (handler->uconv_out != NULL) {
1956 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
1957 }
1958#endif /* LIBXML_ICU_ENABLED */
1959 else {
1960 *outlen = 0;
1961 *inlen = 0;
1962 ret = -4;
1963 }
1964
1965 return(ret);
1966}
1967
Owen Taylor3473f882001-02-23 17:55:21 +00001968/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001969 * xmlCharEncFirstLineInt:
Owen Taylor3473f882001-02-23 17:55:21 +00001970 * @handler: char enconding transformation data structure
1971 * @out: an xmlBuffer for the output.
1972 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001973 * @len: number of bytes to convert for the first line, or -1
1974 *
Owen Taylor3473f882001-02-23 17:55:21 +00001975 * Front-end for the encoding handler input function, but handle only
1976 * the very first line, i.e. limit itself to 45 chars.
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001977 *
1978 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00001979 * -1 general error
1980 * -2 if the transcoding fails (for *in is not valid utf8 string or
1981 * the result of transformation can't fit into the encoding we want), or
1982 */
1983int
Daniel Veillard7e385bd2009-08-26 11:38:49 +02001984xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1985 xmlBufferPtr in, int len) {
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02001986 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001987 int written;
1988 int toconv;
1989
1990 if (handler == NULL) return(-1);
1991 if (out == NULL) return(-1);
1992 if (in == NULL) return(-1);
1993
William M. Brack38d452a2007-05-22 16:00:06 +00001994 /* calculate space available */
Daniel Veillard69f04562011-08-19 11:05:04 +08001995 written = out->size - out->use - 1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00001996 toconv = in->use;
Owen Taylor3473f882001-02-23 17:55:21 +00001997 /*
1998 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1999 * 45 chars should be sufficient to reach the end of the encoding
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002000 * declaration without going too far inside the document content.
Daniel Veillard57c9db02008-03-06 14:37:10 +00002001 * on UTF-16 this means 90bytes, on UCS4 this means 180
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002002 * The actual value depending on guessed encoding is passed as @len
2003 * if provided
Owen Taylor3473f882001-02-23 17:55:21 +00002004 */
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002005 if (len >= 0) {
2006 if (toconv > len)
2007 toconv = len;
2008 } else {
2009 if (toconv > 180)
2010 toconv = 180;
2011 }
William M. Brack38d452a2007-05-22 16:00:06 +00002012 if (toconv * 2 >= written) {
Daniel Veillard18d0db22012-07-13 19:51:15 +08002013 xmlBufferGrow(out, toconv * 2);
William M. Brack38d452a2007-05-22 16:00:06 +00002014 written = out->size - out->use - 1;
2015 }
Owen Taylor3473f882001-02-23 17:55:21 +00002016
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002017 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2018 in->content, &toconv);
2019 xmlBufferShrink(in, toconv);
2020 out->use += written;
2021 out->content[out->use] = 0;
2022 if (ret == -1) ret = -3;
2023
Owen Taylor3473f882001-02-23 17:55:21 +00002024#ifdef DEBUG_ENCODING
2025 switch (ret) {
2026 case 0:
2027 xmlGenericError(xmlGenericErrorContext,
2028 "converted %d bytes to %d bytes of input\n",
2029 toconv, written);
2030 break;
2031 case -1:
2032 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2033 toconv, written, in->use);
2034 break;
2035 case -2:
2036 xmlGenericError(xmlGenericErrorContext,
2037 "input conversion failed due to input error\n");
2038 break;
2039 case -3:
2040 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2041 toconv, written, in->use);
2042 break;
2043 default:
2044 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2045 }
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002046#endif /* DEBUG_ENCODING */
Owen Taylor3473f882001-02-23 17:55:21 +00002047 /*
2048 * Ignore when input buffer is not on a boundary
2049 */
2050 if (ret == -3) ret = 0;
2051 if (ret == -1) ret = 0;
2052 return(ret);
2053}
2054
2055/**
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002056 * xmlCharEncFirstLine:
2057 * @handler: char enconding transformation data structure
2058 * @out: an xmlBuffer for the output.
2059 * @in: an xmlBuffer for the input
2060 *
2061 * Front-end for the encoding handler input function, but handle only
2062 * the very first line, i.e. limit itself to 45 chars.
2063 *
2064 * Returns the number of byte written if success, or
2065 * -1 general error
2066 * -2 if the transcoding fails (for *in is not valid utf8 string or
2067 * the result of transformation can't fit into the encoding we want), or
2068 */
2069int
2070xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2071 xmlBufferPtr in) {
2072 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2073}
2074
2075/**
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002076 * xmlCharEncFirstLineInput:
Daniel Veillard18d0db22012-07-13 19:51:15 +08002077 * @input: a parser input buffer
2078 * @len: number of bytes to convert for the first line, or -1
2079 *
2080 * Front-end for the encoding handler input function, but handle only
2081 * the very first line. Point is that this is based on autodetection
2082 * of the encoding and once that first line is converted we may find
2083 * out that a different decoder is needed to process the input.
2084 *
2085 * Returns the number of byte written if success, or
2086 * -1 general error
2087 * -2 if the transcoding fails (for *in is not valid utf8 string or
2088 * the result of transformation can't fit into the encoding we want), or
2089 */
2090int
2091xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2092{
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002093 int ret;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002094 size_t written;
2095 size_t toconv;
2096 int c_in;
2097 int c_out;
2098 xmlBufPtr in;
2099 xmlBufPtr out;
2100
2101 if ((input == NULL) || (input->encoder == NULL) ||
2102 (input->buffer == NULL) || (input->raw == NULL))
2103 return (-1);
2104 out = input->buffer;
2105 in = input->raw;
2106
2107 toconv = xmlBufUse(in);
2108 if (toconv == 0)
2109 return (0);
2110 written = xmlBufAvail(out) - 1; /* count '\0' */
2111 /*
2112 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2113 * 45 chars should be sufficient to reach the end of the encoding
2114 * declaration without going too far inside the document content.
2115 * on UTF-16 this means 90bytes, on UCS4 this means 180
2116 * The actual value depending on guessed encoding is passed as @len
2117 * if provided
2118 */
2119 if (len >= 0) {
2120 if (toconv > (unsigned int) len)
2121 toconv = len;
2122 } else {
2123 if (toconv > 180)
2124 toconv = 180;
2125 }
2126 if (toconv * 2 >= written) {
2127 xmlBufGrow(out, toconv * 2);
2128 written = xmlBufAvail(out) - 1;
2129 }
2130 if (written > 360)
2131 written = 360;
2132
2133 c_in = toconv;
2134 c_out = written;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002135 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2136 xmlBufContent(in), &c_in);
2137 xmlBufShrink(in, c_in);
2138 xmlBufAddLen(out, c_out);
2139 if (ret == -1)
2140 ret = -3;
2141
Daniel Veillard18d0db22012-07-13 19:51:15 +08002142 switch (ret) {
2143 case 0:
2144#ifdef DEBUG_ENCODING
2145 xmlGenericError(xmlGenericErrorContext,
2146 "converted %d bytes to %d bytes of input\n",
2147 c_in, c_out);
2148#endif
2149 break;
2150 case -1:
2151#ifdef DEBUG_ENCODING
2152 xmlGenericError(xmlGenericErrorContext,
2153 "converted %d bytes to %d bytes of input, %d left\n",
2154 c_in, c_out, (int)xmlBufUse(in));
2155#endif
2156 break;
2157 case -3:
2158#ifdef DEBUG_ENCODING
2159 xmlGenericError(xmlGenericErrorContext,
2160 "converted %d bytes to %d bytes of input, %d left\n",
2161 c_in, c_out, (int)xmlBufUse(in));
2162#endif
2163 break;
2164 case -2: {
2165 char buf[50];
2166 const xmlChar *content = xmlBufContent(in);
2167
2168 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2169 content[0], content[1],
2170 content[2], content[3]);
2171 buf[49] = 0;
2172 xmlEncodingErr(XML_I18N_CONV_FAILED,
2173 "input conversion failed due to input error, bytes %s\n",
2174 buf);
2175 }
2176 }
2177 /*
2178 * Ignore when input buffer is not on a boundary
2179 */
2180 if (ret == -3) ret = 0;
2181 if (ret == -1) ret = 0;
2182 return(ret);
2183}
2184
2185/**
2186 * xmlCharEncInput:
2187 * @input: a parser input buffer
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002188 * @flush: try to flush all the raw buffer
Daniel Veillard18d0db22012-07-13 19:51:15 +08002189 *
2190 * Generic front-end for the encoding handler on parser input
2191 *
2192 * Returns the number of byte written if success, or
2193 * -1 general error
2194 * -2 if the transcoding fails (for *in is not valid utf8 string or
2195 * the result of transformation can't fit into the encoding we want), or
2196 */
2197int
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002198xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
Daniel Veillard18d0db22012-07-13 19:51:15 +08002199{
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002200 int ret;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002201 size_t written;
2202 size_t toconv;
2203 int c_in;
2204 int c_out;
2205 xmlBufPtr in;
2206 xmlBufPtr out;
2207
2208 if ((input == NULL) || (input->encoder == NULL) ||
2209 (input->buffer == NULL) || (input->raw == NULL))
2210 return (-1);
2211 out = input->buffer;
2212 in = input->raw;
2213
2214 toconv = xmlBufUse(in);
2215 if (toconv == 0)
2216 return (0);
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002217 if ((toconv > 64 * 1024) && (flush == 0))
Daniel Veillard18d0db22012-07-13 19:51:15 +08002218 toconv = 64 * 1024;
2219 written = xmlBufAvail(out);
2220 if (written > 0)
2221 written--; /* count '\0' */
2222 if (toconv * 2 >= written) {
2223 xmlBufGrow(out, toconv * 2);
2224 written = xmlBufAvail(out);
2225 if (written > 0)
2226 written--; /* count '\0' */
2227 }
Daniel Veillardbf058dc2013-02-13 18:19:42 +08002228 if ((written > 128 * 1024) && (flush == 0))
Daniel Veillard18d0db22012-07-13 19:51:15 +08002229 written = 128 * 1024;
2230
2231 c_in = toconv;
2232 c_out = written;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002233 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2234 xmlBufContent(in), &c_in);
2235 xmlBufShrink(in, c_in);
2236 xmlBufAddLen(out, c_out);
2237 if (ret == -1)
2238 ret = -3;
2239
Daniel Veillard18d0db22012-07-13 19:51:15 +08002240 switch (ret) {
2241 case 0:
2242#ifdef DEBUG_ENCODING
2243 xmlGenericError(xmlGenericErrorContext,
2244 "converted %d bytes to %d bytes of input\n",
2245 c_in, c_out);
2246#endif
2247 break;
2248 case -1:
2249#ifdef DEBUG_ENCODING
2250 xmlGenericError(xmlGenericErrorContext,
2251 "converted %d bytes to %d bytes of input, %d left\n",
2252 c_in, c_out, (int)xmlBufUse(in));
2253#endif
2254 break;
2255 case -3:
2256#ifdef DEBUG_ENCODING
2257 xmlGenericError(xmlGenericErrorContext,
2258 "converted %d bytes to %d bytes of input, %d left\n",
2259 c_in, c_out, (int)xmlBufUse(in));
2260#endif
2261 break;
2262 case -2: {
2263 char buf[50];
2264 const xmlChar *content = xmlBufContent(in);
2265
2266 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2267 content[0], content[1],
2268 content[2], content[3]);
2269 buf[49] = 0;
2270 xmlEncodingErr(XML_I18N_CONV_FAILED,
2271 "input conversion failed due to input error, bytes %s\n",
2272 buf);
2273 }
2274 }
2275 /*
2276 * Ignore when input buffer is not on a boundary
2277 */
2278 if (ret == -3)
2279 ret = 0;
2280 return (c_out? c_out : ret);
2281}
2282
2283/**
Owen Taylor3473f882001-02-23 17:55:21 +00002284 * xmlCharEncInFunc:
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002285 * @handler: char encoding transformation data structure
Owen Taylor3473f882001-02-23 17:55:21 +00002286 * @out: an xmlBuffer for the output.
2287 * @in: an xmlBuffer for the input
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002288 *
Owen Taylor3473f882001-02-23 17:55:21 +00002289 * Generic front-end for the encoding handler input function
Daniel Veillard7e385bd2009-08-26 11:38:49 +02002290 *
2291 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002292 * -1 general error
2293 * -2 if the transcoding fails (for *in is not valid utf8 string or
2294 * the result of transformation can't fit into the encoding we want), or
2295 */
2296int
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002297xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2298 xmlBufferPtr in)
2299{
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002300 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002301 int written;
2302 int toconv;
2303
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002304 if (handler == NULL)
2305 return (-1);
2306 if (out == NULL)
2307 return (-1);
2308 if (in == NULL)
2309 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002310
2311 toconv = in->use;
2312 if (toconv == 0)
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002313 return (0);
Daniel Veillard69f04562011-08-19 11:05:04 +08002314 written = out->size - out->use -1; /* count '\0' */
Owen Taylor3473f882001-02-23 17:55:21 +00002315 if (toconv * 2 >= written) {
2316 xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002317 written = out->size - out->use - 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002318 }
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002319 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2320 in->content, &toconv);
2321 xmlBufferShrink(in, toconv);
2322 out->use += written;
2323 out->content[out->use] = 0;
2324 if (ret == -1)
2325 ret = -3;
2326
Owen Taylor3473f882001-02-23 17:55:21 +00002327 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002328 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002329#ifdef DEBUG_ENCODING
2330 xmlGenericError(xmlGenericErrorContext,
2331 "converted %d bytes to %d bytes of input\n",
2332 toconv, written);
Owen Taylor3473f882001-02-23 17:55:21 +00002333#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002334 break;
2335 case -1:
2336#ifdef DEBUG_ENCODING
2337 xmlGenericError(xmlGenericErrorContext,
2338 "converted %d bytes to %d bytes of input, %d left\n",
2339 toconv, written, in->use);
2340#endif
2341 break;
2342 case -3:
2343#ifdef DEBUG_ENCODING
2344 xmlGenericError(xmlGenericErrorContext,
2345 "converted %d bytes to %d bytes of input, %d left\n",
2346 toconv, written, in->use);
2347#endif
2348 break;
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002349 case -2: {
2350 char buf[50];
2351
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002352 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002353 in->content[0], in->content[1],
2354 in->content[2], in->content[3]);
2355 buf[49] = 0;
2356 xmlEncodingErr(XML_I18N_CONV_FAILED,
2357 "input conversion failed due to input error, bytes %s\n",
2358 buf);
2359 }
Owen Taylor3473f882001-02-23 17:55:21 +00002360 }
2361 /*
2362 * Ignore when input buffer is not on a boundary
2363 */
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002364 if (ret == -3)
2365 ret = 0;
Daniel Veillard2644ab22005-08-24 14:22:55 +00002366 return (written? written : ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002367}
2368
Denis Pauke28c8a12013-08-03 14:22:54 +03002369#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00002370/**
Daniel Veillard18d0db22012-07-13 19:51:15 +08002371 * xmlCharEncOutput:
Daniel Veillard28cc42d2012-08-10 10:00:18 +08002372 * @output: a parser output buffer
Daniel Veillard18d0db22012-07-13 19:51:15 +08002373 * @init: is this an initialization call without data
2374 *
2375 * Generic front-end for the encoding handler on parser output
2376 * a first call with @init == 1 has to be made first to initiate the
2377 * output in case of non-stateless encoding needing to initiate their
2378 * state or the output (like the BOM in UTF16).
2379 * In case of UTF8 sequence conversion errors for the given encoder,
2380 * the content will be automatically remapped to a CharRef sequence.
2381 *
2382 * Returns the number of byte written if success, or
2383 * -1 general error
2384 * -2 if the transcoding fails (for *in is not valid utf8 string or
2385 * the result of transformation can't fit into the encoding we want), or
2386 */
2387int
2388xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2389{
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002390 int ret;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002391 size_t written;
2392 size_t writtentot = 0;
2393 size_t toconv;
2394 int c_in;
2395 int c_out;
2396 xmlBufPtr in;
2397 xmlBufPtr out;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002398
2399 if ((output == NULL) || (output->encoder == NULL) ||
2400 (output->buffer == NULL) || (output->conv == NULL))
2401 return (-1);
2402 out = output->conv;
2403 in = output->buffer;
2404
2405retry:
2406
2407 written = xmlBufAvail(out);
2408 if (written > 0)
2409 written--; /* count '\0' */
2410
2411 /*
2412 * First specific handling of the initialization call
2413 */
2414 if (init) {
2415 c_in = 0;
2416 c_out = written;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002417 /* TODO: Check return value. */
2418 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2419 NULL, &c_in);
2420 xmlBufAddLen(out, c_out);
Daniel Veillard18d0db22012-07-13 19:51:15 +08002421#ifdef DEBUG_ENCODING
2422 xmlGenericError(xmlGenericErrorContext,
2423 "initialized encoder\n");
2424#endif
2425 return(0);
2426 }
2427
2428 /*
2429 * Conversion itself.
2430 */
2431 toconv = xmlBufUse(in);
2432 if (toconv == 0)
2433 return (0);
2434 if (toconv > 64 * 1024)
2435 toconv = 64 * 1024;
2436 if (toconv * 4 >= written) {
2437 xmlBufGrow(out, toconv * 4);
2438 written = xmlBufAvail(out) - 1;
2439 }
2440 if (written > 256 * 1024)
2441 written = 256 * 1024;
2442
2443 c_in = toconv;
2444 c_out = written;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002445 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2446 xmlBufContent(in), &c_in);
2447 xmlBufShrink(in, c_in);
2448 xmlBufAddLen(out, c_out);
2449 writtentot += c_out;
2450 if (ret == -1) {
Daniel Veillard18d0db22012-07-13 19:51:15 +08002451 if (c_out > 0) {
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002452 /* Can be a limitation of iconv or uconv */
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002453 goto retry;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002454 }
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002455 ret = -3;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002456 }
2457
2458 if (ret >= 0) output += ret;
2459
2460 /*
2461 * Attempt to handle error cases
2462 */
2463 switch (ret) {
2464 case 0:
2465#ifdef DEBUG_ENCODING
2466 xmlGenericError(xmlGenericErrorContext,
2467 "converted %d bytes to %d bytes of output\n",
2468 c_in, c_out);
2469#endif
2470 break;
2471 case -1:
2472#ifdef DEBUG_ENCODING
2473 xmlGenericError(xmlGenericErrorContext,
2474 "output conversion failed by lack of space\n");
2475#endif
2476 break;
2477 case -3:
2478#ifdef DEBUG_ENCODING
2479 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2480 c_in, c_out, (int) xmlBufUse(in));
2481#endif
2482 break;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002483 case -4:
2484 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2485 "xmlCharEncOutFunc: no output function !\n", NULL);
2486 ret = -1;
2487 break;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002488 case -2: {
Nick Wellnhofere5107772017-06-19 15:32:56 +02002489 xmlChar charref[20];
Daniel Veillard18d0db22012-07-13 19:51:15 +08002490 int len = (int) xmlBufUse(in);
2491 xmlChar *content = xmlBufContent(in);
Nick Wellnhofere5107772017-06-19 15:32:56 +02002492 int cur, charrefLen;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002493
2494 cur = xmlGetUTF8Char(content, &len);
Nick Wellnhofere5107772017-06-19 15:32:56 +02002495 if (cur <= 0)
Daniel Veillard18d0db22012-07-13 19:51:15 +08002496 break;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002497
2498#ifdef DEBUG_ENCODING
Nick Wellnhofere5107772017-06-19 15:32:56 +02002499 xmlGenericError(xmlGenericErrorContext,
2500 "handling output conversion error\n");
2501 xmlGenericError(xmlGenericErrorContext,
2502 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2503 content[0], content[1],
2504 content[2], content[3]);
Daniel Veillard18d0db22012-07-13 19:51:15 +08002505#endif
Nick Wellnhofere5107772017-06-19 15:32:56 +02002506 /*
2507 * Removes the UTF8 sequence, and replace it by a charref
2508 * and continue the transcoding phase, hoping the error
2509 * did not mangle the encoder state.
2510 */
2511 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2512 "&#%d;", cur);
2513 xmlBufShrink(in, len);
2514 xmlBufGrow(out, charrefLen * 4);
2515 c_out = xmlBufAvail(out) - 1;
2516 c_in = charrefLen;
2517 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2518 charref, &c_in);
Daniel Veillard18d0db22012-07-13 19:51:15 +08002519
Nick Wellnhofere5107772017-06-19 15:32:56 +02002520 if ((ret < 0) || (c_in != charrefLen)) {
Daniel Veillard18d0db22012-07-13 19:51:15 +08002521 char buf[50];
2522
2523 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2524 content[0], content[1],
2525 content[2], content[3]);
2526 buf[49] = 0;
2527 xmlEncodingErr(XML_I18N_CONV_FAILED,
2528 "output conversion failed due to conv error, bytes %s\n",
2529 buf);
2530 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2531 content[0] = ' ';
Nick Wellnhofere5107772017-06-19 15:32:56 +02002532 break;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002533 }
Nick Wellnhofere5107772017-06-19 15:32:56 +02002534
2535 xmlBufAddLen(out, c_out);
2536 writtentot += c_out;
2537 goto retry;
Daniel Veillard18d0db22012-07-13 19:51:15 +08002538 }
2539 }
2540 return(ret);
2541}
Denis Pauke28c8a12013-08-03 14:22:54 +03002542#endif
Daniel Veillard18d0db22012-07-13 19:51:15 +08002543
2544/**
Owen Taylor3473f882001-02-23 17:55:21 +00002545 * xmlCharEncOutFunc:
2546 * @handler: char enconding transformation data structure
2547 * @out: an xmlBuffer for the output.
2548 * @in: an xmlBuffer for the input
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002549 *
Owen Taylor3473f882001-02-23 17:55:21 +00002550 * Generic front-end for the encoding handler output function
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002551 * a first call with @in == NULL has to be made firs to initiate the
Owen Taylor3473f882001-02-23 17:55:21 +00002552 * output in case of non-stateless encoding needing to initiate their
2553 * state or the output (like the BOM in UTF16).
2554 * In case of UTF8 sequence conversion errors for the given encoder,
2555 * the content will be automatically remapped to a CharRef sequence.
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002556 *
2557 * Returns the number of byte written if success, or
Owen Taylor3473f882001-02-23 17:55:21 +00002558 * -1 general error
2559 * -2 if the transcoding fails (for *in is not valid utf8 string or
2560 * the result of transformation can't fit into the encoding we want), or
2561 */
2562int
2563xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2564 xmlBufferPtr in) {
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002565 int ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002566 int written;
2567 int writtentot = 0;
2568 int toconv;
2569 int output = 0;
2570
2571 if (handler == NULL) return(-1);
2572 if (out == NULL) return(-1);
2573
2574retry:
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002575
Owen Taylor3473f882001-02-23 17:55:21 +00002576 written = out->size - out->use;
2577
Igor Zlatkovic73267db2003-03-08 13:29:24 +00002578 if (written > 0)
2579 written--; /* Gennady: count '/0' */
2580
Owen Taylor3473f882001-02-23 17:55:21 +00002581 /*
2582 * First specific handling of in = NULL, i.e. the initialization call
2583 */
2584 if (in == NULL) {
2585 toconv = 0;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002586 /* TODO: Check return value. */
2587 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2588 NULL, &toconv);
2589 out->use += written;
2590 out->content[out->use] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002591#ifdef DEBUG_ENCODING
2592 xmlGenericError(xmlGenericErrorContext,
2593 "initialized encoder\n");
2594#endif
2595 return(0);
2596 }
2597
2598 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002599 * Conversion itself.
Owen Taylor3473f882001-02-23 17:55:21 +00002600 */
2601 toconv = in->use;
2602 if (toconv == 0)
2603 return(0);
Daniel Veillardf1245392008-04-03 09:46:34 +00002604 if (toconv * 4 >= written) {
2605 xmlBufferGrow(out, toconv * 4);
Owen Taylor3473f882001-02-23 17:55:21 +00002606 written = out->size - out->use - 1;
2607 }
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002608 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2609 in->content, &toconv);
2610 xmlBufferShrink(in, toconv);
2611 out->use += written;
2612 writtentot += written;
2613 out->content[out->use] = 0;
2614 if (ret == -1) {
2615 if (written > 0) {
2616 /* Can be a limitation of iconv or uconv */
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002617 goto retry;
2618 }
2619 ret = -3;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
2621
2622 if (ret >= 0) output += ret;
2623
2624 /*
2625 * Attempt to handle error cases
2626 */
2627 switch (ret) {
Owen Taylor3473f882001-02-23 17:55:21 +00002628 case 0:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002629#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002630 xmlGenericError(xmlGenericErrorContext,
2631 "converted %d bytes to %d bytes of output\n",
2632 toconv, written);
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002633#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002634 break;
2635 case -1:
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002636#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002637 xmlGenericError(xmlGenericErrorContext,
2638 "output conversion failed by lack of space\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002639#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002640 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002641 case -3:
Daniel Veillard809faa52003-02-10 15:43:53 +00002642#ifdef DEBUG_ENCODING
Owen Taylor3473f882001-02-23 17:55:21 +00002643 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2644 toconv, written, in->use);
Daniel Veillard809faa52003-02-10 15:43:53 +00002645#endif
Owen Taylor3473f882001-02-23 17:55:21 +00002646 break;
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002647 case -4:
2648 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2649 "xmlCharEncOutFunc: no output function !\n", NULL);
2650 ret = -1;
2651 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 case -2: {
Nick Wellnhofere5107772017-06-19 15:32:56 +02002653 xmlChar charref[20];
Owen Taylor3473f882001-02-23 17:55:21 +00002654 int len = in->use;
2655 const xmlChar *utf = (const xmlChar *) in->content;
Nick Wellnhofere5107772017-06-19 15:32:56 +02002656 int cur, charrefLen;
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658 cur = xmlGetUTF8Char(utf, &len);
Nick Wellnhofere5107772017-06-19 15:32:56 +02002659 if (cur <= 0)
Timothy Elliott689408b2012-05-08 22:03:22 +08002660 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002661
2662#ifdef DEBUG_ENCODING
Nick Wellnhofere5107772017-06-19 15:32:56 +02002663 xmlGenericError(xmlGenericErrorContext,
2664 "handling output conversion error\n");
2665 xmlGenericError(xmlGenericErrorContext,
2666 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2667 in->content[0], in->content[1],
2668 in->content[2], in->content[3]);
Owen Taylor3473f882001-02-23 17:55:21 +00002669#endif
Nick Wellnhofere5107772017-06-19 15:32:56 +02002670 /*
2671 * Removes the UTF8 sequence, and replace it by a charref
2672 * and continue the transcoding phase, hoping the error
2673 * did not mangle the encoder state.
2674 */
2675 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2676 "&#%d;", cur);
2677 xmlBufferShrink(in, len);
2678 xmlBufferGrow(out, charrefLen * 4);
2679 written = out->size - out->use - 1;
2680 toconv = charrefLen;
2681 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2682 charref, &toconv);
Owen Taylor3473f882001-02-23 17:55:21 +00002683
Nick Wellnhofere5107772017-06-19 15:32:56 +02002684 if ((ret < 0) || (toconv != charrefLen)) {
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002685 char buf[50];
2686
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002687 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
Daniel Veillard1fc3ed02005-08-24 12:46:09 +00002688 in->content[0], in->content[1],
2689 in->content[2], in->content[3]);
2690 buf[49] = 0;
2691 xmlEncodingErr(XML_I18N_CONV_FAILED,
2692 "output conversion failed due to conv error, bytes %s\n",
2693 buf);
Daniel Veillarddf750622006-05-02 12:24:06 +00002694 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2695 in->content[0] = ' ';
Nick Wellnhofere5107772017-06-19 15:32:56 +02002696 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002697 }
Nick Wellnhofere5107772017-06-19 15:32:56 +02002698
2699 out->use += written;
2700 writtentot += written;
2701 out->content[out->use] = 0;
2702 goto retry;
Owen Taylor3473f882001-02-23 17:55:21 +00002703 }
2704 }
2705 return(ret);
2706}
2707
2708/**
2709 * xmlCharEncCloseFunc:
2710 * @handler: char enconding transformation data structure
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002711 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002712 * Generic front-end for encoding handler close function
Owen Taylor3473f882001-02-23 17:55:21 +00002713 *
2714 * Returns 0 if success, or -1 in case of error
2715 */
2716int
2717xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2718 int ret = 0;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002719 int tofree = 0;
Gaurav080a22c2013-11-29 23:10:50 +08002720 int i, handler_in_list = 0;
2721
Owen Taylor3473f882001-02-23 17:55:21 +00002722 if (handler == NULL) return(-1);
2723 if (handler->name == NULL) return(-1);
Gaurav080a22c2013-11-29 23:10:50 +08002724 if (handlers != NULL) {
2725 for (i = 0;i < nbCharEncodingHandler; i++) {
2726 if (handler == handlers[i]) {
2727 handler_in_list = 1;
2728 break;
2729 }
2730 }
2731 }
Owen Taylor3473f882001-02-23 17:55:21 +00002732#ifdef LIBXML_ICONV_ENABLED
2733 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002734 * Iconv handlers can be used only once, free the whole block.
Owen Taylor3473f882001-02-23 17:55:21 +00002735 * and the associated icon resources.
2736 */
Gaurav080a22c2013-11-29 23:10:50 +08002737 if ((handler_in_list == 0) &&
2738 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002739 tofree = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if (handler->iconv_out != NULL) {
2741 if (iconv_close(handler->iconv_out))
2742 ret = -1;
2743 handler->iconv_out = NULL;
2744 }
2745 if (handler->iconv_in != NULL) {
2746 if (iconv_close(handler->iconv_in))
2747 ret = -1;
2748 handler->iconv_in = NULL;
2749 }
Owen Taylor3473f882001-02-23 17:55:21 +00002750 }
2751#endif /* LIBXML_ICONV_ENABLED */
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002752#ifdef LIBXML_ICU_ENABLED
Gaurav080a22c2013-11-29 23:10:50 +08002753 if ((handler_in_list == 0) &&
2754 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002755 tofree = 1;
2756 if (handler->uconv_out != NULL) {
2757 closeIcuConverter(handler->uconv_out);
2758 handler->uconv_out = NULL;
2759 }
2760 if (handler->uconv_in != NULL) {
2761 closeIcuConverter(handler->uconv_in);
2762 handler->uconv_in = NULL;
2763 }
2764 }
2765#endif
2766 if (tofree) {
2767 /* free up only dynamic handlers iconv/uconv */
2768 if (handler->name != NULL)
2769 xmlFree(handler->name);
2770 handler->name = NULL;
2771 xmlFree(handler);
2772 }
Owen Taylor3473f882001-02-23 17:55:21 +00002773#ifdef DEBUG_ENCODING
2774 if (ret)
2775 xmlGenericError(xmlGenericErrorContext,
2776 "failed to close the encoding handler\n");
2777 else
2778 xmlGenericError(xmlGenericErrorContext,
2779 "closed the encoding handler\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002780#endif
Daniel Veillardd79bcd12001-06-21 22:07:42 +00002781
Owen Taylor3473f882001-02-23 17:55:21 +00002782 return(ret);
2783}
2784
Daniel Veillard36711902004-02-11 13:25:26 +00002785/**
2786 * xmlByteConsumed:
2787 * @ctxt: an XML parser context
2788 *
2789 * This function provides the current index of the parser relative
2790 * to the start of the current entity. This function is computed in
2791 * bytes from the beginning starting at zero and finishing at the
2792 * size in byte of the file if parsing a file. The function is
2793 * of constant cost if the input is UTF-8 but can be costly if run
2794 * on non-UTF-8 input.
2795 *
2796 * Returns the index in bytes from the beginning of the entity or -1
2797 * in case the index could not be computed.
2798 */
2799long
2800xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2801 xmlParserInputPtr in;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002802
Daniel Veillard36711902004-02-11 13:25:26 +00002803 if (ctxt == NULL) return(-1);
2804 in = ctxt->input;
2805 if (in == NULL) return(-1);
2806 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2807 unsigned int unused = 0;
2808 xmlCharEncodingHandler * handler = in->buf->encoder;
2809 /*
2810 * Encoding conversion, compute the number of unused original
2811 * bytes from the input not consumed and substract that from
2812 * the raw consumed value, this is not a cheap operation
2813 */
2814 if (in->end - in->cur > 0) {
Daniel Veillardcffc1c72005-03-12 18:54:55 +00002815 unsigned char convbuf[32000];
William M. Brack13dfa872004-09-18 04:52:08 +00002816 const unsigned char *cur = (const unsigned char *)in->cur;
Daniel Veillard36711902004-02-11 13:25:26 +00002817 int toconv = in->end - in->cur, written = 32000;
2818
2819 int ret;
2820
Nick Wellnhoferc9ccbd62017-06-19 14:57:43 +02002821 do {
2822 toconv = in->end - cur;
2823 written = 32000;
2824 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2825 cur, &toconv);
2826 if (ret < 0) {
2827 if (written > 0)
2828 ret = -2;
2829 else
2830 return(-1);
2831 }
2832 unused += written;
2833 cur += toconv;
2834 } while (ret == -2);
Daniel Veillard36711902004-02-11 13:25:26 +00002835 }
2836 if (in->buf->rawconsumed < unused)
2837 return(-1);
2838 return(in->buf->rawconsumed - unused);
2839 }
2840 return(in->consumed + (in->cur - in->base));
2841}
2842
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002843#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002844#ifdef LIBXML_ISO8859X_ENABLED
2845
2846/**
2847 * UTF8ToISO8859x:
2848 * @out: a pointer to an array of bytes to store the result
2849 * @outlen: the length of @out
2850 * @in: a pointer to an array of UTF-8 chars
2851 * @inlen: the length of @in
2852 * @xlattable: the 2-level transcoding table
2853 *
2854 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2855 * block of chars out.
2856 *
2857 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2858 * The value of @inlen after return is the number of octets consumed
2859 * as the return value is positive, else unpredictable.
2860 * The value of @outlen after return is the number of ocetes consumed.
2861 */
2862static int
2863UTF8ToISO8859x(unsigned char* out, int *outlen,
2864 const unsigned char* in, int *inlen,
2865 unsigned char const *xlattable) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002866 const unsigned char* outstart = out;
2867 const unsigned char* inend;
2868 const unsigned char* instart = in;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002869 const unsigned char* processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002870
Daniel Veillardce682bc2004-11-05 17:22:25 +00002871 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2872 (xlattable == NULL))
2873 return(-1);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002874 if (in == NULL) {
2875 /*
2876 * initialization nothing to do
2877 */
2878 *outlen = 0;
2879 *inlen = 0;
2880 return(0);
2881 }
2882 inend = in + (*inlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002883 while (in < inend) {
2884 unsigned char d = *in++;
2885 if (d < 0x80) {
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002886 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002887 } else if (d < 0xC0) {
2888 /* trailing byte in leading position */
2889 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002890 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002891 return(-2);
2892 } else if (d < 0xE0) {
2893 unsigned char c;
2894 if (!(in < inend)) {
2895 /* trailing byte not in input buffer */
2896 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002897 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01002898 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002899 }
2900 c = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002901 if ((c & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002902 /* not a trailing byte */
2903 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002904 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002905 return(-2);
2906 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002907 c = c & 0x3F;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002908 d = d & 0x1F;
2909 d = xlattable [48 + c + xlattable [d] * 64];
2910 if (d == 0) {
2911 /* not in character set */
2912 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002913 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002914 return(-2);
2915 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002916 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002917 } else if (d < 0xF0) {
2918 unsigned char c1;
2919 unsigned char c2;
2920 if (!(in < inend - 1)) {
2921 /* trailing bytes not in input buffer */
2922 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002923 *inlen = processed - instart;
Daniel Veillardad4f0a22010-11-03 20:40:46 +01002924 return(-3);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002925 }
2926 c1 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002927 if ((c1 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002928 /* not a trailing byte (c1) */
2929 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002930 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002931 return(-2);
2932 }
2933 c2 = *in++;
William M. Brackf54924b2004-09-09 14:35:17 +00002934 if ((c2 & 0xC0) != 0x80) {
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002935 /* not a trailing byte (c2) */
2936 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002937 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002938 return(-2);
2939 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002940 c1 = c1 & 0x3F;
2941 c2 = c2 & 0x3F;
William M. Brackf54924b2004-09-09 14:35:17 +00002942 d = d & 0x0F;
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002943 d = xlattable [48 + c2 + xlattable [48 + c1 +
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002944 xlattable [32 + d] * 64] * 64];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002945 if (d == 0) {
2946 /* not in character set */
2947 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002948 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002949 return(-2);
2950 }
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01002951 *out++ = d;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002952 } else {
2953 /* cannot transcode >= U+010000 */
2954 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002955 *inlen = processed - instart;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002956 return(-2);
2957 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002958 processed = in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002959 }
2960 *outlen = out - outstart;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002961 *inlen = processed - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00002962 return(*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002963}
2964
2965/**
2966 * ISO8859xToUTF8
2967 * @out: a pointer to an array of bytes to store the result
2968 * @outlen: the length of @out
2969 * @in: a pointer to an array of ISO Latin 1 chars
2970 * @inlen: the length of @in
2971 *
2972 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2973 * block of chars out.
2974 * Returns 0 if success, or -1 otherwise
2975 * The value of @inlen after return is the number of octets consumed
2976 * The value of @outlen after return is the number of ocetes produced.
2977 */
2978static int
2979ISO8859xToUTF8(unsigned char* out, int *outlen,
2980 const unsigned char* in, int *inlen,
2981 unsigned short const *unicodetable) {
2982 unsigned char* outstart = out;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002983 unsigned char* outend;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002984 const unsigned char* instart = in;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002985 const unsigned char* inend;
Daniel Veillard394902e2005-03-31 08:43:44 +00002986 const unsigned char* instop;
Daniel Veillardce682bc2004-11-05 17:22:25 +00002987 unsigned int c;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002988
Daniel Veillardce682bc2004-11-05 17:22:25 +00002989 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
Daniel Veillardaba37df2004-11-11 20:42:04 +00002990 (in == NULL) || (unicodetable == NULL))
Daniel Veillardce682bc2004-11-05 17:22:25 +00002991 return(-1);
2992 outend = out + *outlen;
2993 inend = in + *inlen;
Daniel Veillard394902e2005-03-31 08:43:44 +00002994 instop = inend;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01002995
2996 while ((in < inend) && (out < outend - 2)) {
2997 if (*in >= 0x80) {
2998 c = unicodetable [*in - 0x80];
Daniel Veillard01fc1a92003-07-30 15:12:01 +00002999 if (c == 0) {
3000 /* undefined code point */
3001 *outlen = out - outstart;
3002 *inlen = in - instart;
3003 return (-1);
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003004 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003005 if (c < 0x800) {
3006 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3007 *out++ = (c & 0x3F) | 0x80;
3008 } else {
3009 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3010 *out++ = ((c >> 6) & 0x3F) | 0x80;
3011 *out++ = (c & 0x3F) | 0x80;
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003012 }
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003013 ++in;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003014 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003015 if (instop - in > outend - out) instop = in + (outend - out);
3016 while ((*in < 0x80) && (in < instop)) {
3017 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003018 }
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003019 }
3020 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3021 *out++ = *in++;
3022 }
3023 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024 *out++ = *in++;
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003025 }
3026 *outlen = out - outstart;
3027 *inlen = in - instart;
Daniel Veillard05f97352004-10-31 15:35:32 +00003028 return (*outlen);
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003029}
3030
Daniel Veillard1cc912e2010-11-03 19:26:35 +01003031
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003032/************************************************************************
3033 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3034 ************************************************************************/
3035
3036static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003037 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3038 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3039 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3040 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3041 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3042 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3043 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3044 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3045 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3046 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3047 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3048 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3049 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3050 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3051 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3052 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003053};
3054
3055static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3056 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3064 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3065 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3066 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3068 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3076 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3077 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3078 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3079 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3080 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3081 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3082 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3083};
3084
3085static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003086 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3087 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3088 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3089 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3090 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3091 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3092 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3093 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3094 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3095 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3096 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3097 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3098 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3099 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3100 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3101 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003102};
3103
3104static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3105 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3113 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3114 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3115 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3118 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3132 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3133 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3134 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3135 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3136};
3137
3138static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003139 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3140 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3141 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3142 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3143 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3144 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3145 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3146 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3147 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3148 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3149 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3150 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3151 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3152 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3153 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3154 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003155};
3156
3157static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3158 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3166 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3167 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3168 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3169 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3170 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3172 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3174 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3176 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3182 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3183 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3184 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3185};
3186
3187static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003188 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3189 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3190 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3191 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3192 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3193 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3194 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3195 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3196 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3197 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3198 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3199 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3200 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3201 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3202 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3203 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003204};
3205
3206static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3207 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3215 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3216 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3219 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3220 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3222 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234};
3235
3236static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003237 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3238 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3239 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3240 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3241 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3242 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3243 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3244 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3245 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3246 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3247 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3248 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3249 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3250 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3251 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003253};
3254
3255static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3256 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3264 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3265 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3273 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3274 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3275 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3276 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279};
3280
3281static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003282 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3283 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3284 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3285 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3286 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3287 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3288 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3289 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3290 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3291 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3292 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3293 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3294 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3295 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3296 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3297 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003298};
3299
3300static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3301 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3309 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3310 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3311 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3312 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3325 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3327 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332};
3333
3334static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003335 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3336 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3337 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3338 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3339 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3340 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3341 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3342 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3343 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3344 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3345 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3347 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3348 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3349 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3350 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003351};
3352
3353static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3354 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3362 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3363 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3364 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3365 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3378 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385};
3386
3387static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003388 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3389 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3390 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3391 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3392 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3393 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3394 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3395 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3396 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3397 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3398 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3399 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3400 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3401 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3402 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3403 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003404};
3405
3406static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3407 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3415 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3416 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3417 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3418 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3420 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430};
3431
3432static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003433 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3434 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3435 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3436 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3437 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3438 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3439 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3440 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3441 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3442 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3443 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3444 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3445 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3446 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3447 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3448 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003449};
3450
3451static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3452 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3460 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3461 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3462 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3464 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3465 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3466 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3467 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3470 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3471 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3480 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3481 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3482 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3483};
3484
3485static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003486 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3487 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3488 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3489 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3490 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3491 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3492 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3493 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3494 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3495 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3496 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3497 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3498 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3499 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3500 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3501 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003502};
3503
3504static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3505 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3513 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3514 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3520 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3524 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3529 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532};
3533
3534static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003535 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3536 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3537 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3538 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3539 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3540 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3541 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3542 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3543 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3544 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3545 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3546 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3547 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3548 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3549 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3550 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003551};
3552
3553static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3554 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3562 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3563 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3564 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3565 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3575 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3577 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3579 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3580 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3581 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3582 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3584 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3585};
3586
3587static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003588 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3589 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3590 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3591 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3592 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3593 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3594 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3595 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3596 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3597 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3598 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3599 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3600 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3601 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3602 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3603 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003604};
3605
3606static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3607 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3615 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3616 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3617 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3622 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3642 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3647 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3648 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3649 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3650};
3651
3652static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003653 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3658 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3659 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3660 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3661 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3664 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3665 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3668 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003669};
3670
3671static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3672 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3682 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3683 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3695 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3696 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3697 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3698 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3699};
3700
3701static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01003702 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3703 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3704 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3705 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3706 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3707 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3708 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3709 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3710 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3711 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3712 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3713 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3714 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3715 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3716 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3717 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
Daniel Veillard01fc1a92003-07-30 15:12:01 +00003718};
3719
3720static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3721 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3729 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3730 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3731 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3732 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3733 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3738 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3740 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3758 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3760};
3761
3762
3763/*
3764 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3765 */
3766
3767static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3768 const unsigned char* in, int *inlen) {
3769 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3770}
3771static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3772 const unsigned char* in, int *inlen) {
3773 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3774}
3775
3776static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3777 const unsigned char* in, int *inlen) {
3778 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3779}
3780static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3781 const unsigned char* in, int *inlen) {
3782 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3783}
3784
3785static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3786 const unsigned char* in, int *inlen) {
3787 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3788}
3789static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3790 const unsigned char* in, int *inlen) {
3791 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3792}
3793
3794static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3795 const unsigned char* in, int *inlen) {
3796 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3797}
3798static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3799 const unsigned char* in, int *inlen) {
3800 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3801}
3802
3803static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3804 const unsigned char* in, int *inlen) {
3805 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3806}
3807static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3808 const unsigned char* in, int *inlen) {
3809 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3810}
3811
3812static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3813 const unsigned char* in, int *inlen) {
3814 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3815}
3816static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3817 const unsigned char* in, int *inlen) {
3818 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3819}
3820
3821static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3822 const unsigned char* in, int *inlen) {
3823 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3824}
3825static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3826 const unsigned char* in, int *inlen) {
3827 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3828}
3829
3830static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3831 const unsigned char* in, int *inlen) {
3832 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3833}
3834static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3835 const unsigned char* in, int *inlen) {
3836 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3837}
3838
3839static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3840 const unsigned char* in, int *inlen) {
3841 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3842}
3843static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3844 const unsigned char* in, int *inlen) {
3845 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3846}
3847
3848static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3849 const unsigned char* in, int *inlen) {
3850 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3851}
3852static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3853 const unsigned char* in, int *inlen) {
3854 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3855}
3856
3857static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3858 const unsigned char* in, int *inlen) {
3859 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3860}
3861static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3862 const unsigned char* in, int *inlen) {
3863 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3864}
3865
3866static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3867 const unsigned char* in, int *inlen) {
3868 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3869}
3870static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3871 const unsigned char* in, int *inlen) {
3872 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3873}
3874
3875static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3876 const unsigned char* in, int *inlen) {
3877 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3878}
3879static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3880 const unsigned char* in, int *inlen) {
3881 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3882}
3883
3884static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3885 const unsigned char* in, int *inlen) {
3886 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3887}
3888static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3889 const unsigned char* in, int *inlen) {
3890 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3891}
3892
3893static void
3894xmlRegisterCharEncodingHandlersISO8859x (void) {
3895 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3896 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3897 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3898 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3899 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3900 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3901 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3902 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3903 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3904 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3905 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3906 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3907 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3908 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3909}
3910
3911#endif
3912#endif
3913
Daniel Veillard5d4644e2005-04-01 13:11:58 +00003914#define bottom_encoding
3915#include "elfgcchack.h"