Blame - encoding.c - fp2-dev/platform/external/libxml2

blob: fab241e6d2fa98a1b08357c20bd39279bcf0b0a0 [file] [log] [blame]

Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1	/*
				2	* encoding.c : implements the encoding conversion functions needed for XML
				3	*
				4	* Related specs:
				5	* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
				6	* rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
				7	* [ISO-10646] UTF-8 and UTF-16 in Annexes
				8	* [ISO-8859-1] ISO Latin-1 characters codes.
				9	* [UNICODE] The Unicode Consortium, "The Unicode Standard --
				10	* Worldwide Character Encoding -- Version 1.0", Addison-
				11	* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
				12	* described in Unicode Technical Report #4.
				13	* [US-ASCII] Coded Character Set--7-bit American Standard Code for
				14	* Information Interchange, ANSI X3.4-1986.
				15	*
				16	* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
				17	*
				18	* See Copyright for the status of this software.
				19	*
				20	* Daniel.Veillard@w3.org
				21	*/
				22
				23	#ifdef WIN32
				24	#include "win32config.h"
				25	#else
				26	#include "config.h"
				27	#endif
				28
				29	#include <stdio.h>
				30	#include <string.h>
				31
				32	#ifdef HAVE_CTYPE_H
				33	#include <ctype.h>
				34	#endif
				35	#ifdef HAVE_STDLIB_H
				36	#include <stdlib.h>
				37	#endif
				38	#include <libxml/xmlversion.h>
				39	#ifdef LIBXML_ICONV_ENABLED
				40	#ifdef HAVE_ERRNO_H
				41	#include <errno.h>
				42	#endif
				43	#endif
				44	#include <libxml/encoding.h>
				45	#include <libxml/xmlmemory.h>
				46	#ifdef LIBXML_HTML_ENABLED
				47	#include <libxml/HTMLparser.h>
				48	#endif
				49	#include <libxml/xmlerror.h>
				50
				51	xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
				52	xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
				53
				54	typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
				55	typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
				56	struct _xmlCharEncodingAlias {
				57	const char *name;
				58	const char *alias;
				59	};
				60
				61	static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
				62	static int xmlCharEncodingAliasesNb = 0;
				63	static int xmlCharEncodingAliasesMax = 0;
				64
				65	#ifdef LIBXML_ICONV_ENABLED
				66	#if 0
				67	#define DEBUG_ENCODING /* Define this to get encoding traces */
				68	#endif
				69	#endif
				70
				71	static int xmlLittleEndian = 1;
				72
				73	/*
				74	* From rfc2044: encoding of the Unicode values on UTF-8:
				75	*
				76	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				77	* 0000 0000-0000 007F 0xxxxxxx
				78	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				79	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				80	*
				81	* I hope we won't use values > 0xFFFF anytime soon !
				82	*/
				83
				84	/**
				85	* xmlGetUTF8Char:
				86	* @utf: a sequence of UTF-8 encoded bytes
				87	* @len: a pointer to @bytes len
				88	*
				89	* Read one UTF8 Char from @utf
				90	*
				91	* Returns the char value or -1 in case of error and update @len with the
				92	* number of bytes used
				93	*/
				94	int
				95	xmlGetUTF8Char(const unsigned char utf, int len) {
				96	unsigned int c;
				97
				98	if (utf == NULL)
				99	goto error;
				100	if (len == NULL)
				101	goto error;
				102	if (*len < 1)
				103	goto error;
				104
				105	c = utf[0];
				106	if (c & 0x80) {
				107	if (*len < 2)
				108	goto error;
				109	if ((utf[1] & 0xc0) != 0x80)
				110	goto error;
				111	if ((c & 0xe0) == 0xe0) {
				112	if (*len < 3)
				113	goto error;
				114	if ((utf[2] & 0xc0) != 0x80)
				115	goto error;
				116	if ((c & 0xf0) == 0xf0) {
				117	if (*len < 4)
				118	goto error;
				119	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				120	goto error;
				121	*len = 4;
				122	/* 4-byte code */
				123	c = (utf[0] & 0x7) << 18;
				124	c \|= (utf[1] & 0x3f) << 12;
				125	c \|= (utf[2] & 0x3f) << 6;
				126	c \|= utf[3] & 0x3f;
				127	} else {
				128	/* 3-byte code */
				129	*len = 3;
				130	c = (utf[0] & 0xf) << 12;
				131	c \|= (utf[1] & 0x3f) << 6;
				132	c \|= utf[2] & 0x3f;
				133	}
				134	} else {
				135	/* 2-byte code */
				136	*len = 2;
				137	c = (utf[0] & 0x1f) << 6;
				138	c \|= utf[1] & 0x3f;
				139	}
				140	} else {
				141	/* 1-byte code */
				142	*len = 1;
				143	}
				144	return(c);
				145
				146	error:
				147	*len = 0;
				148	return(-1);
				149	}
				150
				151	/**
				152	* xmlCheckUTF8: Check utf-8 string for legality.
				153	* @utf: Pointer to putative utf-8 encoded string.
				154	*
				155	* Checks @utf for being valid utf-8. @utf is assumed to be
				156	* null-terminated. This function is not super-strict, as it will
				157	* allow longer utf-8 sequences than necessary. Note that Java is
				158	* capable of producing these sequences if provoked. Also note, this
				159	* routine checks for the 4-byte maxiumum size, but does not check for
				160	* 0x10ffff maximum value.
				161	*
				162	* Return value: true if @utf is valid.
				163	**/
				164	int
				165	xmlCheckUTF8(const unsigned char *utf)
				166	{
				167	int ix;
				168	unsigned char c;
				169
				170	for (ix = 0; (c = utf[ix]);) {
				171	if (c & 0x80) {
				172	if ((utf[ix + 1] & 0xc0) != 0x80)
				173	return(0);
				174	if ((c & 0xe0) == 0xe0) {
				175	if ((utf[ix + 2] & 0xc0) != 0x80)
				176	return(0);
				177	if ((c & 0xf0) == 0xf0) {
				178	if ((c & 0xf8) != 0xf0 \|\| (utf[ix + 3] & 0xc0) != 0x80)
				179	return(0);
				180	ix += 4;
				181	/* 4-byte code */
				182	} else
				183	/* 3-byte code */
				184	ix += 3;
				185	} else
				186	/* 2-byte code */
				187	ix += 2;
				188	} else
				189	/* 1-byte code */
				190	ix++;
				191	}
				192	return(1);
				193	}
				194
				195	/**
				196	* asciiToUTF8:
				197	* @out: a pointer to an array of bytes to store the result
				198	* @outlen: the length of @out
				199	* @in: a pointer to an array of ASCII chars
				200	* @inlen: the length of @in
				201	*
				202	* Take a block of ASCII chars in and try to convert it to an UTF-8
				203	* block of chars out.
				204	* Returns 0 if success, or -1 otherwise
				205	* The value of @inlen after return is the number of octets consumed
				206	* as the return value is positive, else unpredictiable.
				207	* The value of @outlen after return is the number of ocetes consumed.
				208	*/
				209	int
				210	asciiToUTF8(unsigned char* out, int *outlen,
				211	const unsigned char* in, int *inlen) {
				212	unsigned char* outstart = out;
				213	const unsigned char* base = in;
				214	const unsigned char* processed = in;
				215	unsigned char* outend = out + *outlen;
				216	const unsigned char* inend;
				217	unsigned int c;
				218	int bits;
				219
				220	inend = in + (*inlen);
				221	while ((in < inend) && (out - outstart + 5 < *outlen)) {
				222	c= *in++;
				223
				224	/* assertion: c is a single UTF-4 value */
				225	if (out >= outend)
				226	break;
				227	if (c < 0x80) { *out++= c; bits= -6; }
				228	else {
				229	*outlen = out - outstart;
				230	*inlen = processed - base;
				231	return(-1);
				232	}
				233
				234	for ( ; bits >= 0; bits-= 6) {
				235	if (out >= outend)
				236	break;
				237	*out++= ((c >> bits) & 0x3F) \| 0x80;
				238	}
				239	processed = (const unsigned char*) in;
				240	}
				241	*outlen = out - outstart;
				242	*inlen = processed - base;
				243	return(0);
				244	}
				245
				246	/**
				247	* UTF8Toascii:
				248	* @out: a pointer to an array of bytes to store the result
				249	* @outlen: the length of @out
				250	* @in: a pointer to an array of UTF-8 chars
				251	* @inlen: the length of @in
				252	*
				253	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				254	* block of chars out.
				255	*
				256	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				257	* The value of @inlen after return is the number of octets consumed
				258	* as the return value is positive, else unpredictiable.
				259	* The value of @outlen after return is the number of ocetes consumed.
				260	*/
				261	int
				262	UTF8Toascii(unsigned char* out, int *outlen,
				263	const unsigned char* in, int *inlen) {
				264	const unsigned char* processed = in;
				265	const unsigned char* outend;
				266	const unsigned char* outstart = out;
				267	const unsigned char* instart = in;
				268	const unsigned char* inend;
				269	unsigned int c, d;
				270	int trailing;
				271
				272	if (in == NULL) {
				273	/*
				274	* initialization nothing to do
				275	*/
				276	*outlen = 0;
				277	*inlen = 0;
				278	return(0);
				279	}
				280	inend = in + (*inlen);
				281	outend = out + (*outlen);
				282	while (in < inend) {
				283	d = *in++;
				284	if (d < 0x80) { c= d; trailing= 0; }
				285	else if (d < 0xC0) {
				286	/* trailing byte in leading position */
				287	*outlen = out - outstart;
				288	*inlen = processed - instart;
				289	return(-2);
				290	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				291	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				292	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				293	else {
				294	/* no chance for this in Ascii */
				295	*outlen = out - outstart;
				296	*inlen = processed - instart;
				297	return(-2);
				298	}
				299
				300	if (inend - in < trailing) {
				301	break;
				302	}
				303
				304	for ( ; trailing; trailing--) {
				305	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				306	break;
				307	c <<= 6;
				308	c \|= d & 0x3F;
				309	}
				310
				311	/* assertion: c is a single UTF-4 value */
				312	if (c < 0x80) {
				313	if (out >= outend)
				314	break;
				315	*out++ = c;
				316	} else {
				317	/* no chance for this in Ascii */
				318	*outlen = out - outstart;
				319	*inlen = processed - instart;
				320	return(-2);
				321	}
				322	processed = in;
				323	}
				324	*outlen = out - outstart;
				325	*inlen = processed - instart;
				326	return(0);
				327	}
				328
				329	/**
				330	* isolat1ToUTF8:
				331	* @out: a pointer to an array of bytes to store the result
				332	* @outlen: the length of @out
				333	* @in: a pointer to an array of ISO Latin 1 chars
				334	* @inlen: the length of @in
				335	*
				336	* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
				337	* block of chars out.
				338	* Returns 0 if success, or -1 otherwise
				339	* The value of @inlen after return is the number of octets consumed
				340	* as the return value is positive, else unpredictiable.
				341	* The value of @outlen after return is the number of ocetes consumed.
				342	*/
				343	int
				344	isolat1ToUTF8(unsigned char* out, int *outlen,
				345	const unsigned char* in, int *inlen) {
				346	unsigned char* outstart = out;
				347	const unsigned char* base = in;
				348	const unsigned char* processed = in;
				349	unsigned char* outend = out + *outlen;
				350	const unsigned char* inend;
				351	unsigned int c;
				352	int bits;
				353
				354	inend = in + (*inlen);
				355	while ((in < inend) && (out - outstart + 5 < *outlen)) {
				356	c= *in++;
				357
				358	/* assertion: c is a single UTF-4 value */
				359	if (out >= outend)
				360	break;
				361	if (c < 0x80) { *out++= c; bits= -6; }
				362	else { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				363
				364	for ( ; bits >= 0; bits-= 6) {
				365	if (out >= outend)
				366	break;
				367	*out++= ((c >> bits) & 0x3F) \| 0x80;
				368	}
				369	processed = (const unsigned char*) in;
				370	}
				371	*outlen = out - outstart;
				372	*inlen = processed - base;
				373	return(0);
				374	}
				375
				376	/**
				377	* UTF8Toisolat1:
				378	* @out: a pointer to an array of bytes to store the result
				379	* @outlen: the length of @out
				380	* @in: a pointer to an array of UTF-8 chars
				381	* @inlen: the length of @in
				382	*
				383	* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
				384	* block of chars out.
				385	*
				386	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				387	* The value of @inlen after return is the number of octets consumed
				388	* as the return value is positive, else unpredictiable.
				389	* The value of @outlen after return is the number of ocetes consumed.
				390	*/
				391	int
				392	UTF8Toisolat1(unsigned char* out, int *outlen,
				393	const unsigned char* in, int *inlen) {
				394	const unsigned char* processed = in;
				395	const unsigned char* outend;
				396	const unsigned char* outstart = out;
				397	const unsigned char* instart = in;
				398	const unsigned char* inend;
				399	unsigned int c, d;
				400	int trailing;
				401
				402	if (in == NULL) {
				403	/*
				404	* initialization nothing to do
				405	*/
				406	*outlen = 0;
				407	*inlen = 0;
				408	return(0);
				409	}
				410	inend = in + (*inlen);
				411	outend = out + (*outlen);
				412	while (in < inend) {
				413	d = *in++;
				414	if (d < 0x80) { c= d; trailing= 0; }
				415	else if (d < 0xC0) {
				416	/* trailing byte in leading position */
				417	*outlen = out - outstart;
				418	*inlen = processed - instart;
				419	return(-2);
				420	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				421	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				422	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				423	else {
				424	/* no chance for this in IsoLat1 */
				425	*outlen = out - outstart;
				426	*inlen = processed - instart;
				427	return(-2);
				428	}
				429
				430	if (inend - in < trailing) {
				431	break;
				432	}
				433
				434	for ( ; trailing; trailing--) {
				435	if (in >= inend)
				436	break;
				437	if (((d= *in++) & 0xC0) != 0x80) {
				438	*outlen = out - outstart;
				439	*inlen = processed - instart;
				440	return(-2);
				441	}
				442	c <<= 6;
				443	c \|= d & 0x3F;
				444	}
				445
				446	/* assertion: c is a single UTF-4 value */
				447	if (c <= 0xFF) {
				448	if (out >= outend)
				449	break;
				450	*out++ = c;
				451	} else {
				452	/* no chance for this in IsoLat1 */
				453	*outlen = out - outstart;
				454	*inlen = processed - instart;
				455	return(-2);
				456	}
				457	processed = in;
				458	}
				459	*outlen = out - outstart;
				460	*inlen = processed - instart;
				461	return(0);
				462	}
				463
				464	/**
				465	* UTF16LEToUTF8:
				466	* @out: a pointer to an array of bytes to store the result
				467	* @outlen: the length of @out
				468	* @inb: a pointer to an array of UTF-16LE passwd as a byte array
				469	* @inlenb: the length of @in in UTF-16LE chars
				470	*
				471	* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
				472	* block of chars out. This function assume the endian properity
				473	* is the same between the native type of this machine and the
				474	* inputed one.
				475	*
				476	* Returns the number of byte written, or -1 by lack of space, or -2
				477	* if the transcoding fails (for *in is not valid utf16 string)
				478	* The value of *inlen after return is the number of octets consumed
				479	* as the return value is positive, else unpredictiable.
				480	*/
				481	int
				482	UTF16LEToUTF8(unsigned char* out, int *outlen,
				483	const unsigned char* inb, int *inlenb)
				484	{
				485	unsigned char* outstart = out;
				486	const unsigned char* processed = inb;
				487	unsigned char* outend = out + *outlen;
				488	unsigned short* in = (unsigned short*) inb;
				489	unsigned short* inend;
				490	unsigned int c, d, inlen;
				491	unsigned char *tmp;
				492	int bits;
				493
				494	if ((*inlenb % 2) == 1)
				495	(*inlenb)--;
				496	inlen = *inlenb / 2;
				497	inend = in + inlen;
				498	while ((in < inend) && (out - outstart + 5 < *outlen)) {
				499	if (xmlLittleEndian) {
				500	c= *in++;
				501	} else {
				502	tmp = (unsigned char *) in;
				503	c = *tmp++;
				504	c = c \| (((unsigned int)*tmp) << 8);
				505	in++;
				506	}
				507	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				508	if (in >= inend) { /* (in > inend) shouldn't happens */
				509	break;
				510	}
				511	if (xmlLittleEndian) {
				512	d = *in++;
				513	} else {
				514	tmp = (unsigned char *) in;
				515	d = *tmp++;
				516	d = d \| (((unsigned int)*tmp) << 8);
				517	in++;
				518	}
				519	if ((d & 0xFC00) == 0xDC00) {
				520	c &= 0x03FF;
				521	c <<= 10;
				522	c \|= d & 0x03FF;
				523	c += 0x10000;
				524	}
				525	else {
				526	*outlen = out - outstart;
				527	*inlenb = processed - inb;
				528	return(-2);
				529	}
				530	}
				531
				532	/* assertion: c is a single UTF-4 value */
				533	if (out >= outend)
				534	break;
				535	if (c < 0x80) { *out++= c; bits= -6; }
				536	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				537	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				538	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				539
				540	for ( ; bits >= 0; bits-= 6) {
				541	if (out >= outend)
				542	break;
				543	*out++= ((c >> bits) & 0x3F) \| 0x80;
				544	}
				545	processed = (const unsigned char*) in;
				546	}
				547	*outlen = out - outstart;
				548	*inlenb = processed - inb;
				549	return(0);
				550	}
				551
				552	/**
				553	* UTF8ToUTF16LE:
				554	* @outb: a pointer to an array of bytes to store the result
				555	* @outlen: the length of @outb
				556	* @in: a pointer to an array of UTF-8 chars
				557	* @inlen: the length of @in
				558	*
				559	* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
				560	* block of chars out.
				561	*
				562	* Returns the number of byte written, or -1 by lack of space, or -2
				563	* if the transcoding failed.
				564	*/
				565	int
				566	UTF8ToUTF16LE(unsigned char* outb, int *outlen,
				567	const unsigned char* in, int *inlen)
				568	{
				569	unsigned short* out = (unsigned short*) outb;
				570	const unsigned char* processed = in;
				571	unsigned short* outstart= out;
				572	unsigned short* outend;
				573	const unsigned char* inend= in+*inlen;
				574	unsigned int c, d;
				575	int trailing;
				576	unsigned char *tmp;
				577	unsigned short tmp1, tmp2;
				578
				579	if (in == NULL) {
				580	/*
				581	* initialization, add the Byte Order Mark
				582	*/
				583	if (*outlen >= 2) {
				584	outb[0] = 0xFF;
				585	outb[1] = 0xFE;
				586	*outlen = 2;
				587	*inlen = 0;
				588	#ifdef DEBUG_ENCODING
				589	xmlGenericError(xmlGenericErrorContext,
				590	"Added FFFE Byte Order Mark\n");
				591	#endif
				592	return(2);
				593	}
				594	*outlen = 0;
				595	*inlen = 0;
				596	return(0);
				597	}
				598	outend = out + (*outlen / 2);
				599	while (in < inend) {
				600	d= *in++;
				601	if (d < 0x80) { c= d; trailing= 0; }
				602	else if (d < 0xC0) {
				603	/* trailing byte in leading position */
				604	outlen = (out - outstart) 2;
				605	*inlen = processed - in;
				606	return(-2);
				607	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				608	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				609	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				610	else {
				611	/* no chance for this in UTF-16 */
				612	outlen = (out - outstart) 2;
				613	*inlen = processed - in;
				614	return(-2);
				615	}
				616
				617	if (inend - in < trailing) {
				618	break;
				619	}
				620
				621	for ( ; trailing; trailing--) {
				622	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				623	break;
				624	c <<= 6;
				625	c \|= d & 0x3F;
				626	}
				627
				628	/* assertion: c is a single UTF-4 value */
				629	if (c < 0x10000) {
				630	if (out >= outend)
				631	break;
				632	if (xmlLittleEndian) {
				633	*out++ = c;
				634	} else {
				635	tmp = (unsigned char *) out;
				636	*tmp = c ;
				637	*(tmp + 1) = c >> 8 ;
				638	out++;
				639	}
				640	}
				641	else if (c < 0x110000) {
				642	if (out+1 >= outend)
				643	break;
				644	c -= 0x10000;
				645	if (xmlLittleEndian) {
				646	*out++ = 0xD800 \| (c >> 10);
				647	*out++ = 0xDC00 \| (c & 0x03FF);
				648	} else {
				649	tmp1 = 0xD800 \| (c >> 10);
				650	tmp = (unsigned char *) out;
				651	*tmp = (unsigned char) tmp1;
				652	*(tmp + 1) = tmp1 >> 8;
				653	out++;
				654
				655	tmp2 = 0xDC00 \| (c & 0x03FF);
				656	tmp = (unsigned char *) out;
				657	*tmp = (unsigned char) tmp2;
				658	*(tmp + 1) = tmp2 >> 8;
				659	out++;
				660	}
				661	}
				662	else
				663	break;
				664	processed = in;
				665	}
				666	outlen = (out - outstart) 2;
				667	*inlen = processed - in;
				668	return(0);
				669	}
				670
				671	/**
				672	* UTF16BEToUTF8:
				673	* @out: a pointer to an array of bytes to store the result
				674	* @outlen: the length of @out
				675	* @inb: a pointer to an array of UTF-16 passwd as a byte array
				676	* @inlenb: the length of @in in UTF-16 chars
				677	*
				678	* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
				679	* block of chars out. This function assume the endian properity
				680	* is the same between the native type of this machine and the
				681	* inputed one.
				682	*
				683	* Returns the number of byte written, or -1 by lack of space, or -2
				684	* if the transcoding fails (for *in is not valid utf16 string)
				685	* The value of *inlen after return is the number of octets consumed
				686	* as the return value is positive, else unpredictiable.
				687	*/
				688	int
				689	UTF16BEToUTF8(unsigned char* out, int *outlen,
				690	const unsigned char* inb, int *inlenb)
				691	{
				692	unsigned char* outstart = out;
				693	const unsigned char* processed = inb;
				694	unsigned char* outend = out + *outlen;
				695	unsigned short* in = (unsigned short*) inb;
				696	unsigned short* inend;
				697	unsigned int c, d, inlen;
				698	unsigned char *tmp;
				699	int bits;
				700
				701	if ((*inlenb % 2) == 1)
				702	(*inlenb)--;
				703	inlen = *inlenb / 2;
				704	inend= in + inlen;
				705	while (in < inend) {
				706	if (xmlLittleEndian) {
				707	tmp = (unsigned char *) in;
				708	c = *tmp++;
				709	c = c << 8;
				710	c = c \| (unsigned int) *tmp;
				711	in++;
				712	} else {
				713	c= *in++;
				714	}
				715	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				716	if (in >= inend) { /* (in > inend) shouldn't happens */
				717	*outlen = out - outstart;
				718	*inlenb = processed - inb;
				719	return(-2);
				720	}
				721	if (xmlLittleEndian) {
				722	tmp = (unsigned char *) in;
				723	d = *tmp++;
				724	d = d << 8;
				725	d = d \| (unsigned int) *tmp;
				726	in++;
				727	} else {
				728	d= *in++;
				729	}
				730	if ((d & 0xFC00) == 0xDC00) {
				731	c &= 0x03FF;
				732	c <<= 10;
				733	c \|= d & 0x03FF;
				734	c += 0x10000;
				735	}
				736	else {
				737	*outlen = out - outstart;
				738	*inlenb = processed - inb;
				739	return(-2);
				740	}
				741	}
				742
				743	/* assertion: c is a single UTF-4 value */
				744	if (out >= outend)
				745	break;
				746	if (c < 0x80) { *out++= c; bits= -6; }
				747	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				748	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				749	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				750
				751	for ( ; bits >= 0; bits-= 6) {
				752	if (out >= outend)
				753	break;
				754	*out++= ((c >> bits) & 0x3F) \| 0x80;
				755	}
				756	processed = (const unsigned char*) in;
				757	}
				758	*outlen = out - outstart;
				759	*inlenb = processed - inb;
				760	return(0);
				761	}
				762
				763	/**
				764	* UTF8ToUTF16BE:
				765	* @outb: a pointer to an array of bytes to store the result
				766	* @outlen: the length of @outb
				767	* @in: a pointer to an array of UTF-8 chars
				768	* @inlen: the length of @in
				769	*
				770	* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
				771	* block of chars out.
				772	*
				773	* Returns the number of byte written, or -1 by lack of space, or -2
				774	* if the transcoding failed.
				775	*/
				776	int
				777	UTF8ToUTF16BE(unsigned char* outb, int *outlen,
				778	const unsigned char* in, int *inlen)
				779	{
				780	unsigned short* out = (unsigned short*) outb;
				781	const unsigned char* processed = in;
				782	unsigned short* outstart= out;
				783	unsigned short* outend;
				784	const unsigned char* inend= in+*inlen;
				785	unsigned int c, d;
				786	int trailing;
				787	unsigned char *tmp;
				788	unsigned short tmp1, tmp2;
				789
				790	if (in == NULL) {
				791	/*
				792	* initialization, add the Byte Order Mark
				793	*/
				794	if (*outlen >= 2) {
				795	outb[0] = 0xFE;
				796	outb[1] = 0xFF;
				797	*outlen = 2;
				798	*inlen = 0;
				799	#ifdef DEBUG_ENCODING
				800	xmlGenericError(xmlGenericErrorContext,
				801	"Added FEFF Byte Order Mark\n");
				802	#endif
				803	return(2);
				804	}
				805	*outlen = 0;
				806	*inlen = 0;
				807	return(0);
				808	}
				809	outend = out + (*outlen / 2);
				810	while (in < inend) {
				811	d= *in++;
				812	if (d < 0x80) { c= d; trailing= 0; }
				813	else if (d < 0xC0) {
				814	/* trailing byte in leading position */
				815	*outlen = out - outstart;
				816	*inlen = processed - in;
				817	return(-2);
				818	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				819	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				820	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				821	else {
				822	/* no chance for this in UTF-16 */
				823	*outlen = out - outstart;
				824	*inlen = processed - in;
				825	return(-2);
				826	}
				827
				828	if (inend - in < trailing) {
				829	break;
				830	}
				831
				832	for ( ; trailing; trailing--) {
				833	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80)) break;
				834	c <<= 6;
				835	c \|= d & 0x3F;
				836	}
				837
				838	/* assertion: c is a single UTF-4 value */
				839	if (c < 0x10000) {
				840	if (out >= outend) break;
				841	if (xmlLittleEndian) {
				842	tmp = (unsigned char *) out;
				843	*tmp = c >> 8;
				844	*(tmp + 1) = c;
				845	out++;
				846	} else {
				847	*out++ = c;
				848	}
				849	}
				850	else if (c < 0x110000) {
				851	if (out+1 >= outend) break;
				852	c -= 0x10000;
				853	if (xmlLittleEndian) {
				854	tmp1 = 0xD800 \| (c >> 10);
				855	tmp = (unsigned char *) out;
				856	*tmp = tmp1 >> 8;
				857	*(tmp + 1) = (unsigned char) tmp1;
				858	out++;
				859
				860	tmp2 = 0xDC00 \| (c & 0x03FF);
				861	tmp = (unsigned char *) out;
				862	*tmp = tmp2 >> 8;
				863	*(tmp + 1) = (unsigned char) tmp2;
				864	out++;
				865	} else {
				866	*out++ = 0xD800 \| (c >> 10);
				867	*out++ = 0xDC00 \| (c & 0x03FF);
				868	}
				869	}
				870	else
				871	break;
				872	processed = in;
				873	}
				874	outlen = (out - outstart) 2;
				875	*inlen = processed - in;
				876	return(0);
				877	}
				878
				879	/**
				880	* xmlDetectCharEncoding:
				881	* @in: a pointer to the first bytes of the XML entity, must be at least
				882	* 4 bytes long.
				883	* @len: pointer to the length of the buffer
				884	*
				885	* Guess the encoding of the entity using the first bytes of the entity content
				886	* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
				887	*
				888	* Returns one of the XML_CHAR_ENCODING_... values.
				889	*/
				890	xmlCharEncoding
				891	xmlDetectCharEncoding(const unsigned char* in, int len)
				892	{
				893	if (len >= 4) {
				894	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				895	(in[2] == 0x00) && (in[3] == 0x3C))
				896	return(XML_CHAR_ENCODING_UCS4BE);
				897	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
				898	(in[2] == 0x00) && (in[3] == 0x00))
				899	return(XML_CHAR_ENCODING_UCS4LE);
				900	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				901	(in[2] == 0x3C) && (in[3] == 0x00))
				902	return(XML_CHAR_ENCODING_UCS4_2143);
				903	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
				904	(in[2] == 0x00) && (in[3] == 0x00))
				905	return(XML_CHAR_ENCODING_UCS4_3412);
				906	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
				907	(in[2] == 0xA7) && (in[3] == 0x94))
				908	return(XML_CHAR_ENCODING_EBCDIC);
				909	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
				910	(in[2] == 0x78) && (in[3] == 0x6D))
				911	return(XML_CHAR_ENCODING_UTF8);
				912	}
				913	if (len >= 2) {
				914	if ((in[0] == 0xFE) && (in[1] == 0xFF))
				915	return(XML_CHAR_ENCODING_UTF16BE);
				916	if ((in[0] == 0xFF) && (in[1] == 0xFE))
				917	return(XML_CHAR_ENCODING_UTF16LE);
				918	}
				919	return(XML_CHAR_ENCODING_NONE);
				920	}
				921
				922	/**
				923	* xmlCleanupEncodingAliases:
				924	*
				925	* Unregisters all aliases
				926	*/
				927	void
				928	xmlCleanupEncodingAliases(void) {
				929	int i;
				930
				931	if (xmlCharEncodingAliases == NULL)
				932	return;
				933
				934	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				935	if (xmlCharEncodingAliases[i].name != NULL)
				936	xmlFree((char *) xmlCharEncodingAliases[i].name);
				937	if (xmlCharEncodingAliases[i].alias != NULL)
				938	xmlFree((char *) xmlCharEncodingAliases[i].alias);
				939	}
				940	xmlCharEncodingAliasesNb = 0;
				941	xmlCharEncodingAliasesMax = 0;
				942	xmlFree(xmlCharEncodingAliases);
				943	}
				944
				945	/**
				946	* xmlGetEncodingAlias:
				947	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				948	*
				949	* Lookup an encoding name for the given alias.
				950	*
				951	* Returns NULL if not found the original name otherwise
				952	*/
				953	const char *
				954	xmlGetEncodingAlias(const char *alias) {
				955	int i;
				956	char upper[100];
				957
				958	if (alias == NULL)
				959	return(NULL);
				960
				961	if (xmlCharEncodingAliases == NULL)
				962	return(NULL);
				963
				964	for (i = 0;i < 99;i++) {
				965	upper[i] = toupper(alias[i]);
				966	if (upper[i] == 0) break;
				967	}
				968	upper[i] = 0;
				969
				970	/*
				971	* Walk down the list looking for a definition of the alias
				972	*/
				973	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				974	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
				975	return(xmlCharEncodingAliases[i].name);
				976	}
				977	}
				978	return(NULL);
				979	}
				980
				981	/**
				982	* xmlAddEncodingAlias:
				983	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
				984	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				985	*
				986	* Registers and alias @alias for an encoding named @name. Existing alias
				987	* will be overwritten.
				988	*
				989	* Returns 0 in case of success, -1 in case of error
				990	*/
				991	int
				992	xmlAddEncodingAlias(const char name, const char alias) {
				993	int i;
				994	char upper[100];
				995
				996	if ((name == NULL) \|\| (alias == NULL))
				997	return(-1);
				998
				999	for (i = 0;i < 99;i++) {
				1000	upper[i] = toupper(alias[i]);
				1001	if (upper[i] == 0) break;
				1002	}
				1003	upper[i] = 0;
				1004
				1005	if (xmlCharEncodingAliases == NULL) {
				1006	xmlCharEncodingAliasesNb = 0;
				1007	xmlCharEncodingAliasesMax = 20;
				1008	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
				1009	xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
				1010	if (xmlCharEncodingAliases == NULL)
				1011	return(-1);
				1012	} else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
				1013	xmlCharEncodingAliasesMax *= 2;
				1014	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
				1015	xmlRealloc(xmlCharEncodingAliases,
				1016	xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
				1017	}
				1018	/*
				1019	* Walk down the list looking for a definition of the alias
				1020	*/
				1021	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1022	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
				1023	/*
				1024	* Replace the definition.
				1025	*/
				1026	xmlFree((char *) xmlCharEncodingAliases[i].name);
				1027	xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
				1028	return(0);
				1029	}
				1030	}
				1031	/*
				1032	* Add the definition
				1033	*/
				1034	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
				1035	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
				1036	xmlCharEncodingAliasesNb++;
				1037	return(0);
				1038	}
				1039
				1040	/**
				1041	* xmlDelEncodingAlias:
				1042	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				1043	*
				1044	* Unregisters an encoding alias @alias
				1045	*
				1046	* Returns 0 in case of success, -1 in case of error
				1047	*/
				1048	int
				1049	xmlDelEncodingAlias(const char *alias) {
				1050	int i;
				1051
				1052	if (alias == NULL)
				1053	return(-1);
				1054
				1055	if (xmlCharEncodingAliases == NULL)
				1056	return(-1);
				1057	/*
				1058	* Walk down the list looking for a definition of the alias
				1059	*/
				1060	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1061	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
				1062	xmlFree((char *) xmlCharEncodingAliases[i].name);
				1063	xmlFree((char *) xmlCharEncodingAliases[i].alias);
				1064	xmlCharEncodingAliasesNb--;
				1065	memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
				1066	sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
				1067	return(0);
				1068	}
				1069	}
				1070	return(-1);
				1071	}
				1072
				1073	/**
				1074	* xmlParseCharEncoding:
				1075	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
				1076	*
				1077	* Conpare the string to the known encoding schemes already known. Note
				1078	* that the comparison is case insensitive accordingly to the section
				1079	* [XML] 4.3.3 Character Encoding in Entities.
				1080	*
				1081	* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
				1082	* if not recognized.
				1083	*/
				1084	xmlCharEncoding
				1085	xmlParseCharEncoding(const char* name)
				1086	{
				1087	const char *alias;
				1088	char upper[500];
				1089	int i;
				1090
				1091	if (name == NULL)
				1092	return(XML_CHAR_ENCODING_NONE);
				1093
				1094	/*
				1095	* Do the alias resolution
				1096	*/
				1097	alias = xmlGetEncodingAlias(name);
				1098	if (alias != NULL)
				1099	name = alias;
				1100
				1101	for (i = 0;i < 499;i++) {
				1102	upper[i] = toupper(name[i]);
				1103	if (upper[i] == 0) break;
				1104	}
				1105	upper[i] = 0;
				1106
				1107	if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
				1108	if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
				1109	if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
				1110
				1111	/*
				1112	* NOTE: if we were able to parse this, the endianness of UTF16 is
				1113	* already found and in use
				1114	*/
				1115	if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
				1116	if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
				1117
				1118	if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				1119	if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				1120	if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
				1121
				1122	/*
				1123	* NOTE: if we were able to parse this, the endianness of UCS4 is
				1124	* already found and in use
				1125	*/
				1126	if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				1127	if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				1128	if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
				1129
				1130
				1131	if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
				1132	if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
				1133	if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
				1134
				1135	if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
				1136	if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
				1137	if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
				1138
				1139	if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
				1140	if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
				1141	if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
				1142	if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
				1143	if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
				1144	if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
				1145	if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
				1146
				1147	if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
				1148	if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
				1149	if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
				1150
				1151	#ifdef DEBUG_ENCODING
				1152	xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
				1153	#endif
				1154	return(XML_CHAR_ENCODING_ERROR);
				1155	}
				1156
				1157	/**
				1158	* xmlGetCharEncodingName:
				1159	* @enc: the encoding
				1160	*
				1161	* The "canonical" name for XML encoding.
				1162	* C.f. http://www.w3.org/TR/REC-xml#charencoding
				1163	* Section 4.3.3 Character Encoding in Entities
				1164	*
				1165	* Returns the canonical name for the given encoding
				1166	*/
				1167
				1168	const char*
				1169	xmlGetCharEncodingName(xmlCharEncoding enc) {
				1170	switch (enc) {
				1171	case XML_CHAR_ENCODING_ERROR:
				1172	return(NULL);
				1173	case XML_CHAR_ENCODING_NONE:
				1174	return(NULL);
				1175	case XML_CHAR_ENCODING_UTF8:
				1176	return("UTF-8");
				1177	case XML_CHAR_ENCODING_UTF16LE:
				1178	return("UTF-16");
				1179	case XML_CHAR_ENCODING_UTF16BE:
				1180	return("UTF-16");
				1181	case XML_CHAR_ENCODING_EBCDIC:
				1182	return("EBCDIC");
				1183	case XML_CHAR_ENCODING_UCS4LE:
				1184	return("ISO-10646-UCS-4");
				1185	case XML_CHAR_ENCODING_UCS4BE:
				1186	return("ISO-10646-UCS-4");
				1187	case XML_CHAR_ENCODING_UCS4_2143:
				1188	return("ISO-10646-UCS-4");
				1189	case XML_CHAR_ENCODING_UCS4_3412:
				1190	return("ISO-10646-UCS-4");
				1191	case XML_CHAR_ENCODING_UCS2:
				1192	return("ISO-10646-UCS-2");
				1193	case XML_CHAR_ENCODING_8859_1:
				1194	return("ISO-8859-1");
				1195	case XML_CHAR_ENCODING_8859_2:
				1196	return("ISO-8859-2");
				1197	case XML_CHAR_ENCODING_8859_3:
				1198	return("ISO-8859-3");
				1199	case XML_CHAR_ENCODING_8859_4:
				1200	return("ISO-8859-4");
				1201	case XML_CHAR_ENCODING_8859_5:
				1202	return("ISO-8859-5");
				1203	case XML_CHAR_ENCODING_8859_6:
				1204	return("ISO-8859-6");
				1205	case XML_CHAR_ENCODING_8859_7:
				1206	return("ISO-8859-7");
				1207	case XML_CHAR_ENCODING_8859_8:
				1208	return("ISO-8859-8");
				1209	case XML_CHAR_ENCODING_8859_9:
				1210	return("ISO-8859-9");
				1211	case XML_CHAR_ENCODING_2022_JP:
				1212	return("ISO-2022-JP");
				1213	case XML_CHAR_ENCODING_SHIFT_JIS:
				1214	return("Shift-JIS");
				1215	case XML_CHAR_ENCODING_EUC_JP:
				1216	return("EUC-JP");
				1217	case XML_CHAR_ENCODING_ASCII:
				1218	return(NULL);
				1219	}
				1220	return(NULL);
				1221	}
				1222
				1223	/****************************************************************
				1224	* *
				1225	* Char encoding handlers *
				1226	* *
				1227	****************************************************************/
				1228
				1229	/* the size should be growable, but it's not a big deal ... */
				1230	#define MAX_ENCODING_HANDLERS 50
				1231	static xmlCharEncodingHandlerPtr *handlers = NULL;
				1232	static int nbCharEncodingHandler = 0;
				1233
				1234	/*
				1235	* The default is UTF-8 for XML, that's also the default used for the
				1236	* parser internals, so the default encoding handler is NULL
				1237	*/
				1238
				1239	static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
				1240
				1241	/**
				1242	* xmlNewCharEncodingHandler:
				1243	* @name: the encoding name, in UTF-8 format (ASCII actually)
				1244	* @input: the xmlCharEncodingInputFunc to read that encoding
				1245	* @output: the xmlCharEncodingOutputFunc to write that encoding
				1246	*
				1247	* Create and registers an xmlCharEncodingHandler.
				1248	* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
				1249	*/
				1250	xmlCharEncodingHandlerPtr
				1251	xmlNewCharEncodingHandler(const char *name,
				1252	xmlCharEncodingInputFunc input,
				1253	xmlCharEncodingOutputFunc output) {
				1254	xmlCharEncodingHandlerPtr handler;
				1255	const char *alias;
				1256	char upper[500];
				1257	int i;
				1258	char *up = 0;
				1259
				1260	/*
				1261	* Do the alias resolution
				1262	*/
				1263	alias = xmlGetEncodingAlias(name);
				1264	if (alias != NULL)
				1265	name = alias;
				1266
				1267	/*
				1268	* Keep only the uppercase version of the encoding.
				1269	*/
				1270	if (name == NULL) {
				1271	xmlGenericError(xmlGenericErrorContext,
				1272	"xmlNewCharEncodingHandler : no name !\n");
				1273	return(NULL);
				1274	}
				1275	for (i = 0;i < 499;i++) {
				1276	upper[i] = toupper(name[i]);
				1277	if (upper[i] == 0) break;
				1278	}
				1279	upper[i] = 0;
				1280	up = xmlMemStrdup(upper);
				1281	if (up == NULL) {
				1282	xmlGenericError(xmlGenericErrorContext,
				1283	"xmlNewCharEncodingHandler : out of memory !\n");
				1284	return(NULL);
				1285	}
				1286
				1287	/*
				1288	* allocate and fill-up an handler block.
				1289	*/
				1290	handler = (xmlCharEncodingHandlerPtr)
				1291	xmlMalloc(sizeof(xmlCharEncodingHandler));
				1292	if (handler == NULL) {
				1293	xmlGenericError(xmlGenericErrorContext,
				1294	"xmlNewCharEncodingHandler : out of memory !\n");
				1295	return(NULL);
				1296	}
				1297	handler->input = input;
				1298	handler->output = output;
				1299	handler->name = up;
				1300
				1301	#ifdef LIBXML_ICONV_ENABLED
				1302	handler->iconv_in = NULL;
				1303	handler->iconv_out = NULL;
				1304	#endif /* LIBXML_ICONV_ENABLED */
				1305
				1306	/*
				1307	* registers and returns the handler.
				1308	*/
				1309	xmlRegisterCharEncodingHandler(handler);
				1310	#ifdef DEBUG_ENCODING
				1311	xmlGenericError(xmlGenericErrorContext,
				1312	"Registered encoding handler for %s\n", name);
				1313	#endif
				1314	return(handler);
				1315	}
				1316
				1317	/**
				1318	* xmlInitCharEncodingHandlers:
				1319	*
				1320	* Initialize the char encoding support, it registers the default
				1321	* encoding supported.
				1322	* NOTE: while public, this function usually doesn't need to be called
				1323	* in normal processing.
				1324	*/
				1325	void
				1326	xmlInitCharEncodingHandlers(void) {
				1327	unsigned short int tst = 0x1234;
				1328	unsigned char ptr = (unsigned char ) &tst;
				1329
				1330	if (handlers != NULL) return;
				1331
				1332	handlers = (xmlCharEncodingHandlerPtr *)
				1333	xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
				1334
				1335	if (*ptr == 0x12) xmlLittleEndian = 0;
				1336	else if (*ptr == 0x34) xmlLittleEndian = 1;
				1337	else xmlGenericError(xmlGenericErrorContext,
				1338	"Odd problem at endianness detection\n");
				1339
				1340	if (handlers == NULL) {
				1341	xmlGenericError(xmlGenericErrorContext,
				1342	"xmlInitCharEncodingHandlers : out of memory !\n");
				1343	return;
				1344	}
				1345	xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
				1346	xmlUTF16LEHandler =
				1347	xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
				1348	xmlUTF16BEHandler =
				1349	xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
				1350	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
				1351	xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
				1352	#ifdef LIBXML_HTML_ENABLED
				1353	xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
				1354	#endif
				1355	}
				1356
				1357	/**
				1358	* xmlCleanupCharEncodingHandlers:
				1359	*
				1360	* Cleanup the memory allocated for the char encoding support, it
				1361	* unregisters all the encoding handlers and the aliases.
				1362	*/
				1363	void
				1364	xmlCleanupCharEncodingHandlers(void) {
				1365	xmlCleanupEncodingAliases();
				1366
				1367	if (handlers == NULL) return;
				1368
				1369	for (;nbCharEncodingHandler > 0;) {
				1370	nbCharEncodingHandler--;
				1371	if (handlers[nbCharEncodingHandler] != NULL) {
				1372	if (handlers[nbCharEncodingHandler]->name != NULL)
				1373	xmlFree(handlers[nbCharEncodingHandler]->name);
				1374	xmlFree(handlers[nbCharEncodingHandler]);
				1375	}
				1376	}
				1377	xmlFree(handlers);
				1378	handlers = NULL;
				1379	nbCharEncodingHandler = 0;
				1380	xmlDefaultCharEncodingHandler = NULL;
				1381	}
				1382
				1383	/**
				1384	* xmlRegisterCharEncodingHandler:
				1385	* @handler: the xmlCharEncodingHandlerPtr handler block
				1386	*
				1387	* Register the char encoding handler, surprizing, isn't it ?
				1388	*/
				1389	void
				1390	xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
				1391	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1392	if (handler == NULL) {
				1393	xmlGenericError(xmlGenericErrorContext,
				1394	"xmlRegisterCharEncodingHandler: NULL handler !\n");
				1395	return;
				1396	}
				1397
				1398	if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
				1399	xmlGenericError(xmlGenericErrorContext,
				1400	"xmlRegisterCharEncodingHandler: Too many handler registered\n");
				1401	xmlGenericError(xmlGenericErrorContext,
				1402	"\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
				1403	return;
				1404	}
				1405	handlers[nbCharEncodingHandler++] = handler;
				1406	}
				1407
				1408	/**
				1409	* xmlGetCharEncodingHandler:
				1410	* @enc: an xmlCharEncoding value.
				1411	*
				1412	* Search in the registrered set the handler able to read/write that encoding.
				1413	*
				1414	* Returns the handler or NULL if not found
				1415	*/
				1416	xmlCharEncodingHandlerPtr
				1417	xmlGetCharEncodingHandler(xmlCharEncoding enc) {
				1418	xmlCharEncodingHandlerPtr handler;
				1419
				1420	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1421	switch (enc) {
				1422	case XML_CHAR_ENCODING_ERROR:
				1423	return(NULL);
				1424	case XML_CHAR_ENCODING_NONE:
				1425	return(NULL);
				1426	case XML_CHAR_ENCODING_UTF8:
				1427	return(NULL);
				1428	case XML_CHAR_ENCODING_UTF16LE:
				1429	return(xmlUTF16LEHandler);
				1430	case XML_CHAR_ENCODING_UTF16BE:
				1431	return(xmlUTF16BEHandler);
				1432	case XML_CHAR_ENCODING_EBCDIC:
				1433	handler = xmlFindCharEncodingHandler("EBCDIC");
				1434	if (handler != NULL) return(handler);
				1435	handler = xmlFindCharEncodingHandler("ebcdic");
				1436	if (handler != NULL) return(handler);
				1437	break;
				1438	case XML_CHAR_ENCODING_UCS4BE:
				1439	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
				1440	if (handler != NULL) return(handler);
				1441	handler = xmlFindCharEncodingHandler("UCS-4");
				1442	if (handler != NULL) return(handler);
				1443	handler = xmlFindCharEncodingHandler("UCS4");
				1444	if (handler != NULL) return(handler);
				1445	break;
				1446	case XML_CHAR_ENCODING_UCS4LE:
				1447	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
				1448	if (handler != NULL) return(handler);
				1449	handler = xmlFindCharEncodingHandler("UCS-4");
				1450	if (handler != NULL) return(handler);
				1451	handler = xmlFindCharEncodingHandler("UCS4");
				1452	if (handler != NULL) return(handler);
				1453	break;
				1454	case XML_CHAR_ENCODING_UCS4_2143:
				1455	break;
				1456	case XML_CHAR_ENCODING_UCS4_3412:
				1457	break;
				1458	case XML_CHAR_ENCODING_UCS2:
				1459	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
				1460	if (handler != NULL) return(handler);
				1461	handler = xmlFindCharEncodingHandler("UCS-2");
				1462	if (handler != NULL) return(handler);
				1463	handler = xmlFindCharEncodingHandler("UCS2");
				1464	if (handler != NULL) return(handler);
				1465	break;
				1466
				1467	/*
				1468	* We used to keep ISO Latin encodings native in the
				1469	* generated data. This led to so many problems that
				1470	* this has been removed. One can still change this
				1471	* back by registering no-ops encoders for those
				1472	*/
				1473	case XML_CHAR_ENCODING_8859_1:
				1474	handler = xmlFindCharEncodingHandler("ISO-8859-1");
				1475	if (handler != NULL) return(handler);
				1476	break;
				1477	case XML_CHAR_ENCODING_8859_2:
				1478	handler = xmlFindCharEncodingHandler("ISO-8859-2");
				1479	if (handler != NULL) return(handler);
				1480	break;
				1481	case XML_CHAR_ENCODING_8859_3:
				1482	handler = xmlFindCharEncodingHandler("ISO-8859-3");
				1483	if (handler != NULL) return(handler);
				1484	break;
				1485	case XML_CHAR_ENCODING_8859_4:
				1486	handler = xmlFindCharEncodingHandler("ISO-8859-4");
				1487	if (handler != NULL) return(handler);
				1488	break;
				1489	case XML_CHAR_ENCODING_8859_5:
				1490	handler = xmlFindCharEncodingHandler("ISO-8859-5");
				1491	if (handler != NULL) return(handler);
				1492	break;
				1493	case XML_CHAR_ENCODING_8859_6:
				1494	handler = xmlFindCharEncodingHandler("ISO-8859-6");
				1495	if (handler != NULL) return(handler);
				1496	break;
				1497	case XML_CHAR_ENCODING_8859_7:
				1498	handler = xmlFindCharEncodingHandler("ISO-8859-7");
				1499	if (handler != NULL) return(handler);
				1500	break;
				1501	case XML_CHAR_ENCODING_8859_8:
				1502	handler = xmlFindCharEncodingHandler("ISO-8859-8");
				1503	if (handler != NULL) return(handler);
				1504	break;
				1505	case XML_CHAR_ENCODING_8859_9:
				1506	handler = xmlFindCharEncodingHandler("ISO-8859-9");
				1507	if (handler != NULL) return(handler);
				1508	break;
				1509
				1510
				1511	case XML_CHAR_ENCODING_2022_JP:
				1512	handler = xmlFindCharEncodingHandler("ISO-2022-JP");
				1513	if (handler != NULL) return(handler);
				1514	break;
				1515	case XML_CHAR_ENCODING_SHIFT_JIS:
				1516	handler = xmlFindCharEncodingHandler("SHIFT-JIS");
				1517	if (handler != NULL) return(handler);
				1518	handler = xmlFindCharEncodingHandler("SHIFT_JIS");
				1519	if (handler != NULL) return(handler);
				1520	handler = xmlFindCharEncodingHandler("Shift_JIS");
				1521	if (handler != NULL) return(handler);
				1522	break;
				1523	case XML_CHAR_ENCODING_EUC_JP:
				1524	handler = xmlFindCharEncodingHandler("EUC-JP");
				1525	if (handler != NULL) return(handler);
				1526	break;
				1527	default:
				1528	break;
				1529	}
				1530
				1531	#ifdef DEBUG_ENCODING
				1532	xmlGenericError(xmlGenericErrorContext,
				1533	"No handler found for encoding %d\n", enc);
				1534	#endif
				1535	return(NULL);
				1536	}
				1537
				1538	/**
				1539	* xmlGetCharEncodingHandler:
				1540	* @enc: a string describing the char encoding.
				1541	*
				1542	* Search in the registrered set the handler able to read/write that encoding.
				1543	*
				1544	* Returns the handler or NULL if not found
				1545	*/
				1546	xmlCharEncodingHandlerPtr
				1547	xmlFindCharEncodingHandler(const char *name) {
				1548	const char *nalias;
				1549	const char *norig;
				1550	xmlCharEncoding alias;
				1551	#ifdef LIBXML_ICONV_ENABLED
				1552	xmlCharEncodingHandlerPtr enc;
				1553	iconv_t icv_in, icv_out;
				1554	#endif /* LIBXML_ICONV_ENABLED */
				1555	char upper[100];
				1556	int i;
				1557
				1558	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1559	if (name == NULL) return(xmlDefaultCharEncodingHandler);
				1560	if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
				1561
				1562	/*
				1563	* Do the alias resolution
				1564	*/
				1565	norig = name;
				1566	nalias = xmlGetEncodingAlias(name);
				1567	if (nalias != NULL)
				1568	name = nalias;
				1569
				1570	/*
				1571	* Check first for directly registered encoding names
				1572	*/
				1573	for (i = 0;i < 99;i++) {
				1574	upper[i] = toupper(name[i]);
				1575	if (upper[i] == 0) break;
				1576	}
				1577	upper[i] = 0;
				1578
				1579	for (i = 0;i < nbCharEncodingHandler; i++)
				1580	if (!strcmp(upper, handlers[i]->name)) {
				1581	#ifdef DEBUG_ENCODING
				1582	xmlGenericError(xmlGenericErrorContext,
				1583	"Found registered handler for encoding %s\n", name);
				1584	#endif
				1585	return(handlers[i]);
				1586	}
				1587
				1588	#ifdef LIBXML_ICONV_ENABLED
				1589	/* check whether iconv can handle this */
				1590	icv_in = iconv_open("UTF-8", name);
				1591	icv_out = iconv_open(name, "UTF-8");
				1592	if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
				1593	enc = (xmlCharEncodingHandlerPtr)
				1594	xmlMalloc(sizeof(xmlCharEncodingHandler));
				1595	if (enc == NULL) {
				1596	iconv_close(icv_in);
				1597	iconv_close(icv_out);
				1598	return(NULL);
				1599	}
				1600	enc->name = xmlMemStrdup(name);
				1601	enc->input = NULL;
				1602	enc->output = NULL;
				1603	enc->iconv_in = icv_in;
				1604	enc->iconv_out = icv_out;
				1605	#ifdef DEBUG_ENCODING
				1606	xmlGenericError(xmlGenericErrorContext,
				1607	"Found iconv handler for encoding %s\n", name);
				1608	#endif
				1609	return enc;
				1610	} else if ((icv_in != (iconv_t) -1) \|\| icv_out != (iconv_t) -1) {
				1611	xmlGenericError(xmlGenericErrorContext,
				1612	"iconv : problems with filters for '%s'\n", name);
				1613	}
				1614	#endif /* LIBXML_ICONV_ENABLED */
				1615
				1616	#ifdef DEBUG_ENCODING
				1617	xmlGenericError(xmlGenericErrorContext,
				1618	"No handler found for encoding %s\n", name);
				1619	#endif
				1620
				1621	/*
				1622	* Fallback using the canonical names
				1623	*/
				1624	alias = xmlParseCharEncoding(norig);
				1625	if (alias != XML_CHAR_ENCODING_ERROR) {
				1626	const char* canon;
				1627	canon = xmlGetCharEncodingName(alias);
				1628	if ((canon != NULL) && (strcmp(name, canon))) {
				1629	return(xmlFindCharEncodingHandler(canon));
				1630	}
				1631	}
				1632
				1633	return(NULL);
				1634	}
				1635
				1636	#ifdef LIBXML_ICONV_ENABLED
				1637	/**
				1638	* xmlIconvWrapper:
				1639	* @cd: iconv converter data structure
				1640	* @out: a pointer to an array of bytes to store the result
				1641	* @outlen: the length of @out
				1642	* @in: a pointer to an array of ISO Latin 1 chars
				1643	* @inlen: the length of @in
				1644	*
				1645	* Returns 0 if success, or
				1646	* -1 by lack of space, or
				1647	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1648	* the result of transformation can't fit into the encoding we want), or
				1649	* -3 if there the last byte can't form a single output char.
				1650	*
				1651	* The value of @inlen after return is the number of octets consumed
				1652	* as the return value is positive, else unpredictiable.
				1653	* The value of @outlen after return is the number of ocetes consumed.
				1654	*/
				1655	static int
				1656	xmlIconvWrapper(iconv_t cd,
				1657	unsigned char out, int outlen,
				1658	const unsigned char in, int inlen) {
				1659
				1660	size_t icv_inlen = inlen, icv_outlen = outlen;
				1661	const char icv_in = (const char ) in;
				1662	char icv_out = (char ) out;
				1663	int ret;
				1664
				1665	ret = iconv(cd,
				1666	&icv_in, &icv_inlen,
				1667	&icv_out, &icv_outlen);
				1668	if (in != NULL) {
				1669	*inlen -= icv_inlen;
				1670	*outlen -= icv_outlen;
				1671	} else {
				1672	*inlen = 0;
				1673	*outlen = 0;
				1674	}
				1675	if (icv_inlen != 0 \|\| ret == (size_t) -1) {
				1676	#ifdef EILSEQ
				1677	if (errno == EILSEQ) {
				1678	return -2;
				1679	} else
				1680	#endif
				1681	#ifdef E2BIG
				1682	if (errno == E2BIG) {
				1683	return -1;
				1684	} else
				1685	#endif
				1686	#ifdef EINVAL
				1687	if (errno == EINVAL) {
				1688	return -3;
				1689	} else
				1690	#endif
				1691	{
				1692	return -3;
				1693	}
				1694	}
				1695	return 0;
				1696	}
				1697	#endif /* LIBXML_ICONV_ENABLED */
				1698
				1699	/**
				1700	* xmlCharEncFirstLine:
				1701	* @handler: char enconding transformation data structure
				1702	* @out: an xmlBuffer for the output.
				1703	* @in: an xmlBuffer for the input
				1704	*
				1705	* Front-end for the encoding handler input function, but handle only
				1706	* the very first line, i.e. limit itself to 45 chars.
				1707	*
				1708	* Returns the number of byte written if success, or
				1709	* -1 general error
				1710	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1711	* the result of transformation can't fit into the encoding we want), or
				1712	*/
				1713	int
				1714	xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
				1715	xmlBufferPtr in) {
				1716	int ret = -2;
				1717	int written;
				1718	int toconv;
				1719
				1720	if (handler == NULL) return(-1);
				1721	if (out == NULL) return(-1);
				1722	if (in == NULL) return(-1);
				1723
				1724	written = out->size - out->use;
				1725	toconv = in->use;
				1726	if (toconv * 2 >= written) {
				1727	xmlBufferGrow(out, toconv);
				1728	written = out->size - out->use - 1;
				1729	}
				1730
				1731	/*
				1732	* echo '<?xml version="1.0" encoding="UCS4"?>' \| wc -c => 38
				1733	* 45 chars should be sufficient to reach the end of the encoding
				1734	* decalration without going too far inside the document content.
				1735	*/
				1736	written = 45;
				1737
				1738	if (handler->input != NULL) {
				1739	ret = handler->input(&out->content[out->use], &written,
				1740	in->content, &toconv);
				1741	xmlBufferShrink(in, toconv);
				1742	out->use += written;
				1743	out->content[out->use] = 0;
				1744	}
				1745	#ifdef LIBXML_ICONV_ENABLED
				1746	else if (handler->iconv_in != NULL) {
				1747	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
				1748	&written, in->content, &toconv);
				1749	xmlBufferShrink(in, toconv);
				1750	out->use += written;
				1751	out->content[out->use] = 0;
				1752	if (ret == -1) ret = -3;
				1753	}
				1754	#endif /* LIBXML_ICONV_ENABLED */
				1755	#ifdef DEBUG_ENCODING
				1756	switch (ret) {
				1757	case 0:
				1758	xmlGenericError(xmlGenericErrorContext,
				1759	"converted %d bytes to %d bytes of input\n",
				1760	toconv, written);
				1761	break;
				1762	case -1:
				1763	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				1764	toconv, written, in->use);
				1765	break;
				1766	case -2:
				1767	xmlGenericError(xmlGenericErrorContext,
				1768	"input conversion failed due to input error\n");
				1769	break;
				1770	case -3:
				1771	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				1772	toconv, written, in->use);
				1773	break;
				1774	default:
				1775	xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
				1776	}
				1777	#endif
				1778	/*
				1779	* Ignore when input buffer is not on a boundary
				1780	*/
				1781	if (ret == -3) ret = 0;
				1782	if (ret == -1) ret = 0;
				1783	return(ret);
				1784	}
				1785
				1786	/**
				1787	* xmlCharEncInFunc:
				1788	* @handler: char enconding transformation data structure
				1789	* @out: an xmlBuffer for the output.
				1790	* @in: an xmlBuffer for the input
				1791	*
				1792	* Generic front-end for the encoding handler input function
				1793	*
				1794	* Returns the number of byte written if success, or
				1795	* -1 general error
				1796	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1797	* the result of transformation can't fit into the encoding we want), or
				1798	*/
				1799	int
				1800	xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
				1801	xmlBufferPtr in) {
				1802	int ret = -2;
				1803	int written;
				1804	int toconv;
				1805
				1806	if (handler == NULL) return(-1);
				1807	if (out == NULL) return(-1);
				1808	if (in == NULL) return(-1);
				1809
				1810	toconv = in->use;
				1811	if (toconv == 0)
				1812	return(0);
				1813	written = out->size - out->use;
				1814	if (toconv * 2 >= written) {
				1815	xmlBufferGrow(out, out->size + toconv * 2);
				1816	written = out->size - out->use - 1;
				1817	}
				1818	if (handler->input != NULL) {
				1819	ret = handler->input(&out->content[out->use], &written,
				1820	in->content, &toconv);
				1821	xmlBufferShrink(in, toconv);
				1822	out->use += written;
				1823	out->content[out->use] = 0;
				1824	}
				1825	#ifdef LIBXML_ICONV_ENABLED
				1826	else if (handler->iconv_in != NULL) {
				1827	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
				1828	&written, in->content, &toconv);
				1829	xmlBufferShrink(in, toconv);
				1830	out->use += written;
				1831	out->content[out->use] = 0;
				1832	if (ret == -1) ret = -3;
				1833	}
				1834	#endif /* LIBXML_ICONV_ENABLED */
				1835	switch (ret) {
				1836	#ifdef DEBUG_ENCODING
				1837	case 0:
				1838	xmlGenericError(xmlGenericErrorContext,
				1839	"converted %d bytes to %d bytes of input\n",
				1840	toconv, written);
				1841	break;
				1842	case -1:
				1843	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				1844	toconv, written, in->use);
				1845	break;
				1846	case -3:
				1847	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				1848	toconv, written, in->use);
				1849	break;
				1850	#endif
				1851	case -2:
				1852	xmlGenericError(xmlGenericErrorContext,
				1853	"input conversion failed due to input error\n");
				1854	xmlGenericError(xmlGenericErrorContext,
				1855	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				1856	in->content[0], in->content[1],
				1857	in->content[2], in->content[3]);
				1858	}
				1859	/*
				1860	* Ignore when input buffer is not on a boundary
				1861	*/
				1862	if (ret == -3) ret = 0;
				1863	return(ret);
				1864	}
				1865
				1866	/**
				1867	* xmlCharEncOutFunc:
				1868	* @handler: char enconding transformation data structure
				1869	* @out: an xmlBuffer for the output.
				1870	* @in: an xmlBuffer for the input
				1871	*
				1872	* Generic front-end for the encoding handler output function
				1873	* a first call with @in == NULL has to be made firs to initiate the
				1874	* output in case of non-stateless encoding needing to initiate their
				1875	* state or the output (like the BOM in UTF16).
				1876	* In case of UTF8 sequence conversion errors for the given encoder,
				1877	* the content will be automatically remapped to a CharRef sequence.
				1878	*
				1879	* Returns the number of byte written if success, or
				1880	* -1 general error
				1881	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1882	* the result of transformation can't fit into the encoding we want), or
				1883	*/
				1884	int
				1885	xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
				1886	xmlBufferPtr in) {
				1887	int ret = -2;
				1888	int written;
				1889	int writtentot = 0;
				1890	int toconv;
				1891	int output = 0;
				1892
				1893	if (handler == NULL) return(-1);
				1894	if (out == NULL) return(-1);
				1895
				1896	retry:
				1897
				1898	written = out->size - out->use;
				1899
				1900	/*
				1901	* First specific handling of in = NULL, i.e. the initialization call
				1902	*/
				1903	if (in == NULL) {
				1904	toconv = 0;
				1905	if (handler->output != NULL) {
				1906	ret = handler->output(&out->content[out->use], &written,
				1907	NULL, &toconv);
				1908	out->use += written;
				1909	out->content[out->use] = 0;
				1910	}
				1911	#ifdef LIBXML_ICONV_ENABLED
				1912	else if (handler->iconv_out != NULL) {
				1913	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
				1914	&written, NULL, &toconv);
				1915	out->use += written;
				1916	out->content[out->use] = 0;
				1917	}
				1918	#endif /* LIBXML_ICONV_ENABLED */
				1919	#ifdef DEBUG_ENCODING
				1920	xmlGenericError(xmlGenericErrorContext,
				1921	"initialized encoder\n");
				1922	#endif
				1923	return(0);
				1924	}
				1925
				1926	/*
				1927	* Convertion itself.
				1928	*/
				1929	toconv = in->use;
				1930	if (toconv == 0)
				1931	return(0);
				1932	if (toconv * 2 >= written) {
				1933	xmlBufferGrow(out, toconv * 2);
				1934	written = out->size - out->use - 1;
				1935	}
				1936	if (handler->output != NULL) {
				1937	ret = handler->output(&out->content[out->use], &written,
				1938	in->content, &toconv);
				1939	xmlBufferShrink(in, toconv);
				1940	out->use += written;
				1941	writtentot += written;
				1942	out->content[out->use] = 0;
				1943	}
				1944	#ifdef LIBXML_ICONV_ENABLED
				1945	else if (handler->iconv_out != NULL) {
				1946	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
				1947	&written, in->content, &toconv);
				1948	xmlBufferShrink(in, toconv);
				1949	out->use += written;
				1950	writtentot += written;
				1951	out->content[out->use] = 0;
				1952	if (ret == -1) {
				1953	if (written > 0) {
				1954	/*
				1955	* Can be a limitation of iconv
				1956	*/
				1957	goto retry;
				1958	}
				1959	ret = -3;
				1960	}
				1961	}
				1962	#endif /* LIBXML_ICONV_ENABLED */
				1963	else {
				1964	xmlGenericError(xmlGenericErrorContext,
				1965	"xmlCharEncOutFunc: no output function !\n");
				1966	return(-1);
				1967	}
				1968
				1969	if (ret >= 0) output += ret;
				1970
				1971	/*
				1972	* Attempt to handle error cases
				1973	*/
				1974	switch (ret) {
				1975	#ifdef DEBUG_ENCODING
				1976	case 0:
				1977	xmlGenericError(xmlGenericErrorContext,
				1978	"converted %d bytes to %d bytes of output\n",
				1979	toconv, written);
				1980	break;
				1981	case -1:
				1982	xmlGenericError(xmlGenericErrorContext,
				1983	"output conversion failed by lack of space\n");
				1984	break;
				1985	#endif
				1986	case -3:
				1987	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
				1988	toconv, written, in->use);
				1989	break;
				1990	case -2: {
				1991	int len = in->use;
				1992	const xmlChar utf = (const xmlChar ) in->content;
				1993	int cur;
				1994
				1995	cur = xmlGetUTF8Char(utf, &len);
				1996	if (cur > 0) {
				1997	xmlChar charref[20];
				1998
				1999	#ifdef DEBUG_ENCODING
				2000	xmlGenericError(xmlGenericErrorContext,
				2001	"handling output conversion error\n");
				2002	xmlGenericError(xmlGenericErrorContext,
				2003	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				2004	in->content[0], in->content[1],
				2005	in->content[2], in->content[3]);
				2006	#endif
				2007	/*
				2008	* Removes the UTF8 sequence, and replace it by a charref
				2009	* and continue the transcoding phase, hoping the error
				2010	* did not mangle the encoder state.
				2011	*/
				2012	sprintf((char *) charref, "&#x%X;", cur);
				2013	xmlBufferShrink(in, len);
				2014	xmlBufferAddHead(in, charref, -1);
				2015
				2016	goto retry;
				2017	} else {
				2018	xmlGenericError(xmlGenericErrorContext,
				2019	"output conversion failed due to conv error\n");
				2020	xmlGenericError(xmlGenericErrorContext,
				2021	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				2022	in->content[0], in->content[1],
				2023	in->content[2], in->content[3]);
				2024	in->content[0] = ' ';
				2025	}
				2026	break;
				2027	}
				2028	}
				2029	return(ret);
				2030	}
				2031
				2032	/**
				2033	* xmlCharEncCloseFunc:
				2034	* @handler: char enconding transformation data structure
				2035	*
				2036	* Generic front-end for hencoding handler close function
				2037	*
				2038	* Returns 0 if success, or -1 in case of error
				2039	*/
				2040	int
				2041	xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
				2042	int ret = 0;
				2043	if (handler == NULL) return(-1);
				2044	if (handler->name == NULL) return(-1);
				2045	#ifdef LIBXML_ICONV_ENABLED
				2046	/*
				2047	* Iconv handlers can be oused only once, free the whole block.
				2048	* and the associated icon resources.
				2049	*/
				2050	if ((handler->iconv_out != NULL) \|\| (handler->iconv_in != NULL)) {
				2051	if (handler->name != NULL)
				2052	xmlFree(handler->name);
				2053	handler->name = NULL;
				2054	if (handler->iconv_out != NULL) {
				2055	if (iconv_close(handler->iconv_out))
				2056	ret = -1;
				2057	handler->iconv_out = NULL;
				2058	}
				2059	if (handler->iconv_in != NULL) {
				2060	if (iconv_close(handler->iconv_in))
				2061	ret = -1;
				2062	handler->iconv_in = NULL;
				2063	}
				2064	xmlFree(handler);
				2065	}
				2066	#endif /* LIBXML_ICONV_ENABLED */
				2067	#ifdef DEBUG_ENCODING
				2068	if (ret)
				2069	xmlGenericError(xmlGenericErrorContext,
				2070	"failed to close the encoding handler\n");
				2071	else
				2072	xmlGenericError(xmlGenericErrorContext,
				2073	"closed the encoding handler\n");
				2074
				2075	#endif
				2076	return(ret);
				2077	}
				2078