Blame - encoding.c - platform/external/libxml2

blob: 781d8dab6a91f71fc947a0240c7b94a0286b1a93 [file] [log] [blame]

Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1	/*
				2	* encoding.c : implements the encoding conversion functions needed for XML
				3	*
				4	* Related specs:
				5	* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
				6	* rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
				7	* [ISO-10646] UTF-8 and UTF-16 in Annexes
				8	* [ISO-8859-1] ISO Latin-1 characters codes.
				9	* [UNICODE] The Unicode Consortium, "The Unicode Standard --
				10	* Worldwide Character Encoding -- Version 1.0", Addison-
				11	* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
				12	* described in Unicode Technical Report #4.
				13	* [US-ASCII] Coded Character Set--7-bit American Standard Code for
				14	* Information Interchange, ANSI X3.4-1986.
				15	*
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	16	* See Copyright for the status of this software.
				17	*
Daniel Veillard	c5d6434	2001-06-24 12:13:24 +0000	[diff] [blame]	18	* daniel@veillard.com
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	19	*
				20	* UTF8 string routines from:
				21	* "William M. Brack" <wbrack@mmm.com.hk>
				22	*
				23	* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	24	*/
				25
Bjorn Reese	70a9da5	2001-04-21 16:57:29 +0000	[diff] [blame]	26	#include "libxml.h"
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	27
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	28	#include <string.h>
				29
				30	#ifdef HAVE_CTYPE_H
				31	#include <ctype.h>
				32	#endif
				33	#ifdef HAVE_STDLIB_H
				34	#include <stdlib.h>
				35	#endif
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	36	#ifdef LIBXML_ICONV_ENABLED
				37	#ifdef HAVE_ERRNO_H
				38	#include <errno.h>
				39	#endif
				40	#endif
				41	#include <libxml/encoding.h>
				42	#include <libxml/xmlmemory.h>
				43	#ifdef LIBXML_HTML_ENABLED
				44	#include <libxml/HTMLparser.h>
				45	#endif
				46	#include <libxml/xmlerror.h>
				47
Daniel Veillard	2209073	2001-07-16 00:06:07 +0000	[diff] [blame]	48	static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
				49	static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	50
				51	typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
				52	typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
				53	struct _xmlCharEncodingAlias {
				54	const char *name;
				55	const char *alias;
				56	};
				57
				58	static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
				59	static int xmlCharEncodingAliasesNb = 0;
				60	static int xmlCharEncodingAliasesMax = 0;
				61
				62	#ifdef LIBXML_ICONV_ENABLED
				63	#if 0
				64	#define DEBUG_ENCODING /* Define this to get encoding traces */
				65	#endif
				66	#endif
				67
				68	static int xmlLittleEndian = 1;
				69
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	70	/************************************************************************
				71	* *
				72	* Generic UTF8 handling routines *
				73	* *
				74	* From rfc2044: encoding of the Unicode values on UTF-8: *
				75	* *
				76	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
				77	* 0000 0000-0000 007F 0xxxxxxx *
				78	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
				79	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
				80	* *
				81	* I hope we won't use values > 0xFFFF anytime soon ! *
				82	* *
				83	************************************************************************/
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	84
				85	/**
Daniel Veillard	e043ee1	2001-04-16 14:08:07 +0000	[diff] [blame]	86	* xmlUTF8Strlen:
				87	* @utf: a sequence of UTF-8 encoded bytes
				88	*
Daniel Veillard	60087f3	2001-10-10 09:45:09 +0000	[diff] [blame^]	89	* compute the length of an UTF8 string, it doesn't do a full UTF8
Daniel Veillard	e043ee1	2001-04-16 14:08:07 +0000	[diff] [blame]	90	* checking of the content of the string.
				91	*
				92	* Returns the number of characters in the string or -1 in case of error
				93	*/
				94	int
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	95	xmlUTF8Strlen(const xmlChar *utf) {
Daniel Veillard	e043ee1	2001-04-16 14:08:07 +0000	[diff] [blame]	96	int ret = 0;
				97
				98	if (utf == NULL)
				99	return(-1);
				100
				101	while (*utf != 0) {
				102	if (utf[0] & 0x80) {
				103	if ((utf[1] & 0xc0) != 0x80)
				104	return(-1);
				105	if ((utf[0] & 0xe0) == 0xe0) {
				106	if ((utf[2] & 0xc0) != 0x80)
				107	return(-1);
				108	if ((utf[0] & 0xf0) == 0xf0) {
				109	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				110	return(-1);
				111	utf += 4;
				112	} else {
				113	utf += 3;
				114	}
				115	} else {
				116	utf += 2;
				117	}
				118	} else {
				119	utf++;
				120	}
				121	ret++;
				122	}
				123	return(ret);
				124	}
				125
				126	/**
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	127	* xmlGetUTF8Char:
				128	* @utf: a sequence of UTF-8 encoded bytes
				129	* @len: a pointer to @bytes len
				130	*
				131	* Read one UTF8 Char from @utf
				132	*
				133	* Returns the char value or -1 in case of error and update @len with the
				134	* number of bytes used
				135	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	136	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	137	xmlGetUTF8Char(const unsigned char utf, int len) {
				138	unsigned int c;
				139
				140	if (utf == NULL)
				141	goto error;
				142	if (len == NULL)
				143	goto error;
				144	if (*len < 1)
				145	goto error;
				146
				147	c = utf[0];
				148	if (c & 0x80) {
				149	if (*len < 2)
				150	goto error;
				151	if ((utf[1] & 0xc0) != 0x80)
				152	goto error;
				153	if ((c & 0xe0) == 0xe0) {
				154	if (*len < 3)
				155	goto error;
				156	if ((utf[2] & 0xc0) != 0x80)
				157	goto error;
				158	if ((c & 0xf0) == 0xf0) {
				159	if (*len < 4)
				160	goto error;
				161	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
				162	goto error;
				163	*len = 4;
				164	/* 4-byte code */
				165	c = (utf[0] & 0x7) << 18;
				166	c \|= (utf[1] & 0x3f) << 12;
				167	c \|= (utf[2] & 0x3f) << 6;
				168	c \|= utf[3] & 0x3f;
				169	} else {
				170	/* 3-byte code */
				171	*len = 3;
				172	c = (utf[0] & 0xf) << 12;
				173	c \|= (utf[1] & 0x3f) << 6;
				174	c \|= utf[2] & 0x3f;
				175	}
				176	} else {
				177	/* 2-byte code */
				178	*len = 2;
				179	c = (utf[0] & 0x1f) << 6;
				180	c \|= utf[1] & 0x3f;
				181	}
				182	} else {
				183	/* 1-byte code */
				184	*len = 1;
				185	}
				186	return(c);
				187
				188	error:
				189	*len = 0;
				190	return(-1);
				191	}
				192
				193	/**
				194	* xmlCheckUTF8: Check utf-8 string for legality.
				195	* @utf: Pointer to putative utf-8 encoded string.
				196	*
				197	* Checks @utf for being valid utf-8. @utf is assumed to be
				198	* null-terminated. This function is not super-strict, as it will
				199	* allow longer utf-8 sequences than necessary. Note that Java is
				200	* capable of producing these sequences if provoked. Also note, this
				201	* routine checks for the 4-byte maxiumum size, but does not check for
				202	* 0x10ffff maximum value.
				203	*
				204	* Return value: true if @utf is valid.
				205	**/
				206	int
				207	xmlCheckUTF8(const unsigned char *utf)
				208	{
				209	int ix;
				210	unsigned char c;
				211
				212	for (ix = 0; (c = utf[ix]);) {
				213	if (c & 0x80) {
				214	if ((utf[ix + 1] & 0xc0) != 0x80)
				215	return(0);
				216	if ((c & 0xe0) == 0xe0) {
				217	if ((utf[ix + 2] & 0xc0) != 0x80)
				218	return(0);
				219	if ((c & 0xf0) == 0xf0) {
				220	if ((c & 0xf8) != 0xf0 \|\| (utf[ix + 3] & 0xc0) != 0x80)
				221	return(0);
				222	ix += 4;
				223	/* 4-byte code */
				224	} else
				225	/* 3-byte code */
				226	ix += 3;
				227	} else
				228	/* 2-byte code */
				229	ix += 2;
				230	} else
				231	/* 1-byte code */
				232	ix++;
				233	}
				234	return(1);
				235	}
				236
				237	/**
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	238	* xmlUTF8Strsize:
				239	* @utf: a sequence of UTF-8 encoded bytes
				240	* @len: the number of characters in the array
				241	*
				242	* storage size of an UTF8 string
				243	*
				244	* Returns the storage size of
				245	* the first 'len' characters of ARRAY
				246	*
				247	*/
				248
				249	int
				250	xmlUTF8Strsize(const xmlChar *utf, int len) {
				251	const xmlChar *ptr=utf;
				252	xmlChar ch;
				253
				254	if (len <= 0)
				255	return(0);
				256
				257	while ( len-- > 0) {
				258	if ( !*ptr )
				259	break;
				260	if ( (ch = *ptr++) & 0x80)
				261	while ( (ch<<=1) & 0x80 )
				262	ptr++;
				263	}
				264	return (ptr - utf);
				265	}
				266
				267
				268	/**
				269	* xmlUTF8Strndup:
				270	* @utf: the input UTF8 *
				271	* @len: the len of @utf (in chars)
				272	*
				273	* a strndup for array of UTF8's
				274	*
				275	* Returns a new UTF8 * or NULL
				276	*/
				277	xmlChar *
				278	xmlUTF8Strndup(const xmlChar *utf, int len) {
				279	xmlChar *ret;
				280	int i;
				281
				282	if ((utf == NULL) \|\| (len < 0)) return(NULL);
				283	i = xmlUTF8Strsize(utf, len);
				284	ret = (xmlChar ) xmlMalloc((i + 1) sizeof(xmlChar));
				285	if (ret == NULL) {
				286	xmlGenericError(xmlGenericErrorContext,
				287	"malloc of %ld byte failed\n",
				288	(len + 1) * (long)sizeof(xmlChar));
				289	return(NULL);
				290	}
				291	memcpy(ret, utf, i * sizeof(xmlChar));
				292	ret[i] = 0;
				293	return(ret);
				294	}
				295
				296	/**
				297	* xmlUTF8Strpos:
				298	* @utf: the input UTF8 *
				299	* @pos: the position of the desired UTF8 char (in chars)
				300	*
				301	* a function to provide the equivalent of fetching a
				302	* character from a string array
				303	*
				304	* Returns a pointer to the UTF8 character or NULL
				305	*/
				306	xmlChar *
				307	xmlUTF8Strpos(const xmlChar *utf, int pos) {
				308	xmlChar ch;
				309
				310	if (utf == NULL) return(NULL);
				311	if ( (pos < 0) \|\| (pos >= xmlUTF8Strlen(utf)) )
				312	return(NULL);
				313	while (pos--) {
				314	if ((ch=*utf++) == 0) return(NULL);
				315	if ( ch & 0x80 ) {
				316	/* if not simple ascii, verify proper format */
				317	if ( (ch & 0xc0) != 0xc0 )
				318	return(NULL);
				319	/* then skip over remaining bytes for this char */
				320	while ( (ch <<= 1) & 0x80 )
				321	if ( (*utf++ & 0xc0) != 0x80 )
				322	return(NULL);
				323	}
				324	}
				325	return((xmlChar *)utf);
				326	}
				327
				328	/**
				329	* xmlUTF8Strloc:
				330	* @utf: the input UTF8 *
				331	* @utfchar: the UTF8 character to be found
				332	*
				333	* a function to provide relative location of a UTF8 char
				334	*
				335	* Returns the relative character position of the desired char
				336	* or -1 if not found
				337	*/
				338	int
				339	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
				340	int i, size;
				341	xmlChar ch;
				342
				343	if (utf==NULL \|\| utfchar==NULL) return -1;
				344	size = xmlUTF8Strsize(utfchar, 1);
				345	for(i=0; (ch=*utf) != 0; i++) {
				346	if (xmlStrncmp(utf, utfchar, size)==0)
				347	return(i);
				348	utf++;
				349	if ( ch & 0x80 ) {
				350	/* if not simple ascii, verify proper format */
				351	if ( (ch & 0xc0) != 0xc0 )
				352	return(-1);
				353	/* then skip over remaining bytes for this char */
				354	while ( (ch <<= 1) & 0x80 )
				355	if ( (*utf++ & 0xc0) != 0x80 )
				356	return(-1);
				357	}
				358	}
				359
				360	return(-1);
				361	}
				362	/**
				363	* xmlUTF8Strsub:
				364	* @utf: a sequence of UTF-8 encoded bytes
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	365	* @start: relative pos of first char
				366	* @len: total number to copy
				367	*
				368	* Note: positions are given in units of UTF-8 chars
				369	*
				370	* Returns a pointer to a newly created string
				371	* or NULL if any problem
				372	*/
				373
				374	xmlChar *
				375	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
				376	int i;
				377	xmlChar ch;
				378
				379	if (utf == NULL) return(NULL);
				380	if (start < 0) return(NULL);
				381	if (len < 0) return(NULL);
				382
				383	/*
				384	* Skip over any leading chars
				385	*/
				386	for (i = 0;i < start;i++) {
				387	if ((ch=*utf++) == 0) return(NULL);
				388	if ( ch & 0x80 ) {
				389	/* if not simple ascii, verify proper format */
				390	if ( (ch & 0xc0) != 0xc0 )
				391	return(NULL);
				392	/* then skip over remaining bytes for this char */
				393	while ( (ch <<= 1) & 0x80 )
				394	if ( (*utf++ & 0xc0) != 0x80 )
				395	return(NULL);
				396	}
				397	}
				398
				399	return(xmlUTF8Strndup(utf, len));
				400	}
				401
				402	/************************************************************************
				403	* *
				404	* Conversions To/From UTF8 encoding *
				405	* *
				406	************************************************************************/
				407
				408	/**
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	409	* asciiToUTF8:
				410	* @out: a pointer to an array of bytes to store the result
				411	* @outlen: the length of @out
				412	* @in: a pointer to an array of ASCII chars
				413	* @inlen: the length of @in
				414	*
				415	* Take a block of ASCII chars in and try to convert it to an UTF-8
				416	* block of chars out.
				417	* Returns 0 if success, or -1 otherwise
				418	* The value of @inlen after return is the number of octets consumed
				419	* as the return value is positive, else unpredictiable.
				420	* The value of @outlen after return is the number of ocetes consumed.
				421	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	422	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	423	asciiToUTF8(unsigned char* out, int *outlen,
				424	const unsigned char* in, int *inlen) {
				425	unsigned char* outstart = out;
				426	const unsigned char* base = in;
				427	const unsigned char* processed = in;
				428	unsigned char* outend = out + *outlen;
				429	const unsigned char* inend;
				430	unsigned int c;
				431	int bits;
				432
				433	inend = in + (*inlen);
				434	while ((in < inend) && (out - outstart + 5 < *outlen)) {
				435	c= *in++;
				436
				437	/* assertion: c is a single UTF-4 value */
				438	if (out >= outend)
				439	break;
				440	if (c < 0x80) { *out++= c; bits= -6; }
				441	else {
				442	*outlen = out - outstart;
				443	*inlen = processed - base;
				444	return(-1);
				445	}
				446
				447	for ( ; bits >= 0; bits-= 6) {
				448	if (out >= outend)
				449	break;
				450	*out++= ((c >> bits) & 0x3F) \| 0x80;
				451	}
				452	processed = (const unsigned char*) in;
				453	}
				454	*outlen = out - outstart;
				455	*inlen = processed - base;
				456	return(0);
				457	}
				458
				459	/**
				460	* UTF8Toascii:
				461	* @out: a pointer to an array of bytes to store the result
				462	* @outlen: the length of @out
				463	* @in: a pointer to an array of UTF-8 chars
				464	* @inlen: the length of @in
				465	*
				466	* Take a block of UTF-8 chars in and try to convert it to an ASCII
				467	* block of chars out.
				468	*
				469	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				470	* The value of @inlen after return is the number of octets consumed
				471	* as the return value is positive, else unpredictiable.
				472	* The value of @outlen after return is the number of ocetes consumed.
				473	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	474	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	475	UTF8Toascii(unsigned char* out, int *outlen,
				476	const unsigned char* in, int *inlen) {
				477	const unsigned char* processed = in;
				478	const unsigned char* outend;
				479	const unsigned char* outstart = out;
				480	const unsigned char* instart = in;
				481	const unsigned char* inend;
				482	unsigned int c, d;
				483	int trailing;
				484
				485	if (in == NULL) {
				486	/*
				487	* initialization nothing to do
				488	*/
				489	*outlen = 0;
				490	*inlen = 0;
				491	return(0);
				492	}
				493	inend = in + (*inlen);
				494	outend = out + (*outlen);
				495	while (in < inend) {
				496	d = *in++;
				497	if (d < 0x80) { c= d; trailing= 0; }
				498	else if (d < 0xC0) {
				499	/* trailing byte in leading position */
				500	*outlen = out - outstart;
				501	*inlen = processed - instart;
				502	return(-2);
				503	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				504	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				505	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				506	else {
				507	/* no chance for this in Ascii */
				508	*outlen = out - outstart;
				509	*inlen = processed - instart;
				510	return(-2);
				511	}
				512
				513	if (inend - in < trailing) {
				514	break;
				515	}
				516
				517	for ( ; trailing; trailing--) {
				518	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				519	break;
				520	c <<= 6;
				521	c \|= d & 0x3F;
				522	}
				523
				524	/* assertion: c is a single UTF-4 value */
				525	if (c < 0x80) {
				526	if (out >= outend)
				527	break;
				528	*out++ = c;
				529	} else {
				530	/* no chance for this in Ascii */
				531	*outlen = out - outstart;
				532	*inlen = processed - instart;
				533	return(-2);
				534	}
				535	processed = in;
				536	}
				537	*outlen = out - outstart;
				538	*inlen = processed - instart;
				539	return(0);
				540	}
				541
				542	/**
				543	* isolat1ToUTF8:
				544	* @out: a pointer to an array of bytes to store the result
				545	* @outlen: the length of @out
				546	* @in: a pointer to an array of ISO Latin 1 chars
				547	* @inlen: the length of @in
				548	*
				549	* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
				550	* block of chars out.
				551	* Returns 0 if success, or -1 otherwise
				552	* The value of @inlen after return is the number of octets consumed
				553	* as the return value is positive, else unpredictiable.
				554	* The value of @outlen after return is the number of ocetes consumed.
				555	*/
				556	int
				557	isolat1ToUTF8(unsigned char* out, int *outlen,
				558	const unsigned char* in, int *inlen) {
				559	unsigned char* outstart = out;
				560	const unsigned char* base = in;
				561	const unsigned char* processed = in;
				562	unsigned char* outend = out + *outlen;
				563	const unsigned char* inend;
				564	unsigned int c;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	565
				566	inend = in + (*inlen);
Daniel Veillard	02141ea	2001-04-30 11:46:40 +0000	[diff] [blame]	567	while (in < inend) {
				568	c = *in++;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	569
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	570	if (out >= outend)
				571	break;
Daniel Veillard	02141ea	2001-04-30 11:46:40 +0000	[diff] [blame]	572
				573	if (c < 0x80) {
				574	*out++ = c;
				575	processed++;
				576	continue;
				577	} else {
				578	*out++= ((c >> 6) & 0x1F) \| 0xC0;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	579	if (out >= outend)
Daniel Veillard	02141ea	2001-04-30 11:46:40 +0000	[diff] [blame]	580	break;
				581	*out++= (c & 0x3F) \| 0x80;
				582	processed++;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	583	}
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	584	}
				585	*outlen = out - outstart;
				586	*inlen = processed - base;
				587	return(0);
				588	}
				589
				590	/**
				591	* UTF8Toisolat1:
				592	* @out: a pointer to an array of bytes to store the result
				593	* @outlen: the length of @out
				594	* @in: a pointer to an array of UTF-8 chars
				595	* @inlen: the length of @in
				596	*
				597	* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
				598	* block of chars out.
				599	*
				600	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
				601	* The value of @inlen after return is the number of octets consumed
				602	* as the return value is positive, else unpredictiable.
				603	* The value of @outlen after return is the number of ocetes consumed.
				604	*/
				605	int
				606	UTF8Toisolat1(unsigned char* out, int *outlen,
				607	const unsigned char* in, int *inlen) {
				608	const unsigned char* processed = in;
				609	const unsigned char* outend;
				610	const unsigned char* outstart = out;
				611	const unsigned char* instart = in;
				612	const unsigned char* inend;
				613	unsigned int c, d;
				614	int trailing;
				615
				616	if (in == NULL) {
				617	/*
				618	* initialization nothing to do
				619	*/
				620	*outlen = 0;
				621	*inlen = 0;
				622	return(0);
				623	}
				624	inend = in + (*inlen);
				625	outend = out + (*outlen);
				626	while (in < inend) {
				627	d = *in++;
				628	if (d < 0x80) { c= d; trailing= 0; }
				629	else if (d < 0xC0) {
				630	/* trailing byte in leading position */
				631	*outlen = out - outstart;
				632	*inlen = processed - instart;
				633	return(-2);
				634	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				635	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				636	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				637	else {
				638	/* no chance for this in IsoLat1 */
				639	*outlen = out - outstart;
				640	*inlen = processed - instart;
				641	return(-2);
				642	}
				643
				644	if (inend - in < trailing) {
				645	break;
				646	}
				647
				648	for ( ; trailing; trailing--) {
				649	if (in >= inend)
				650	break;
				651	if (((d= *in++) & 0xC0) != 0x80) {
				652	*outlen = out - outstart;
				653	*inlen = processed - instart;
				654	return(-2);
				655	}
				656	c <<= 6;
				657	c \|= d & 0x3F;
				658	}
				659
				660	/* assertion: c is a single UTF-4 value */
				661	if (c <= 0xFF) {
				662	if (out >= outend)
				663	break;
				664	*out++ = c;
				665	} else {
				666	/* no chance for this in IsoLat1 */
				667	*outlen = out - outstart;
				668	*inlen = processed - instart;
				669	return(-2);
				670	}
				671	processed = in;
				672	}
				673	*outlen = out - outstart;
				674	*inlen = processed - instart;
				675	return(0);
				676	}
				677
				678	/**
				679	* UTF16LEToUTF8:
				680	* @out: a pointer to an array of bytes to store the result
				681	* @outlen: the length of @out
				682	* @inb: a pointer to an array of UTF-16LE passwd as a byte array
				683	* @inlenb: the length of @in in UTF-16LE chars
				684	*
				685	* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
				686	* block of chars out. This function assume the endian properity
				687	* is the same between the native type of this machine and the
				688	* inputed one.
				689	*
				690	* Returns the number of byte written, or -1 by lack of space, or -2
				691	* if the transcoding fails (for *in is not valid utf16 string)
				692	* The value of *inlen after return is the number of octets consumed
				693	* as the return value is positive, else unpredictiable.
				694	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	695	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	696	UTF16LEToUTF8(unsigned char* out, int *outlen,
				697	const unsigned char* inb, int *inlenb)
				698	{
				699	unsigned char* outstart = out;
				700	const unsigned char* processed = inb;
				701	unsigned char* outend = out + *outlen;
				702	unsigned short* in = (unsigned short*) inb;
				703	unsigned short* inend;
				704	unsigned int c, d, inlen;
				705	unsigned char *tmp;
				706	int bits;
				707
				708	if ((*inlenb % 2) == 1)
				709	(*inlenb)--;
				710	inlen = *inlenb / 2;
				711	inend = in + inlen;
				712	while ((in < inend) && (out - outstart + 5 < *outlen)) {
				713	if (xmlLittleEndian) {
				714	c= *in++;
				715	} else {
				716	tmp = (unsigned char *) in;
				717	c = *tmp++;
				718	c = c \| (((unsigned int)*tmp) << 8);
				719	in++;
				720	}
				721	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				722	if (in >= inend) { /* (in > inend) shouldn't happens */
				723	break;
				724	}
				725	if (xmlLittleEndian) {
				726	d = *in++;
				727	} else {
				728	tmp = (unsigned char *) in;
				729	d = *tmp++;
				730	d = d \| (((unsigned int)*tmp) << 8);
				731	in++;
				732	}
				733	if ((d & 0xFC00) == 0xDC00) {
				734	c &= 0x03FF;
				735	c <<= 10;
				736	c \|= d & 0x03FF;
				737	c += 0x10000;
				738	}
				739	else {
				740	*outlen = out - outstart;
				741	*inlenb = processed - inb;
				742	return(-2);
				743	}
				744	}
				745
				746	/* assertion: c is a single UTF-4 value */
				747	if (out >= outend)
				748	break;
				749	if (c < 0x80) { *out++= c; bits= -6; }
				750	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				751	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				752	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				753
				754	for ( ; bits >= 0; bits-= 6) {
				755	if (out >= outend)
				756	break;
				757	*out++= ((c >> bits) & 0x3F) \| 0x80;
				758	}
				759	processed = (const unsigned char*) in;
				760	}
				761	*outlen = out - outstart;
				762	*inlenb = processed - inb;
				763	return(0);
				764	}
				765
				766	/**
				767	* UTF8ToUTF16LE:
				768	* @outb: a pointer to an array of bytes to store the result
				769	* @outlen: the length of @outb
				770	* @in: a pointer to an array of UTF-8 chars
				771	* @inlen: the length of @in
				772	*
				773	* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
				774	* block of chars out.
				775	*
				776	* Returns the number of byte written, or -1 by lack of space, or -2
				777	* if the transcoding failed.
				778	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	779	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	780	UTF8ToUTF16LE(unsigned char* outb, int *outlen,
				781	const unsigned char* in, int *inlen)
				782	{
				783	unsigned short* out = (unsigned short*) outb;
				784	const unsigned char* processed = in;
				785	unsigned short* outstart= out;
				786	unsigned short* outend;
				787	const unsigned char* inend= in+*inlen;
				788	unsigned int c, d;
				789	int trailing;
				790	unsigned char *tmp;
				791	unsigned short tmp1, tmp2;
				792
				793	if (in == NULL) {
				794	/*
				795	* initialization, add the Byte Order Mark
				796	*/
				797	if (*outlen >= 2) {
				798	outb[0] = 0xFF;
				799	outb[1] = 0xFE;
				800	*outlen = 2;
				801	*inlen = 0;
				802	#ifdef DEBUG_ENCODING
				803	xmlGenericError(xmlGenericErrorContext,
				804	"Added FFFE Byte Order Mark\n");
				805	#endif
				806	return(2);
				807	}
				808	*outlen = 0;
				809	*inlen = 0;
				810	return(0);
				811	}
				812	outend = out + (*outlen / 2);
				813	while (in < inend) {
				814	d= *in++;
				815	if (d < 0x80) { c= d; trailing= 0; }
				816	else if (d < 0xC0) {
				817	/* trailing byte in leading position */
				818	outlen = (out - outstart) 2;
				819	*inlen = processed - in;
				820	return(-2);
				821	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				822	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				823	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				824	else {
				825	/* no chance for this in UTF-16 */
				826	outlen = (out - outstart) 2;
				827	*inlen = processed - in;
				828	return(-2);
				829	}
				830
				831	if (inend - in < trailing) {
				832	break;
				833	}
				834
				835	for ( ; trailing; trailing--) {
				836	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
				837	break;
				838	c <<= 6;
				839	c \|= d & 0x3F;
				840	}
				841
				842	/* assertion: c is a single UTF-4 value */
				843	if (c < 0x10000) {
				844	if (out >= outend)
				845	break;
				846	if (xmlLittleEndian) {
				847	*out++ = c;
				848	} else {
				849	tmp = (unsigned char *) out;
				850	*tmp = c ;
				851	*(tmp + 1) = c >> 8 ;
				852	out++;
				853	}
				854	}
				855	else if (c < 0x110000) {
				856	if (out+1 >= outend)
				857	break;
				858	c -= 0x10000;
				859	if (xmlLittleEndian) {
				860	*out++ = 0xD800 \| (c >> 10);
				861	*out++ = 0xDC00 \| (c & 0x03FF);
				862	} else {
				863	tmp1 = 0xD800 \| (c >> 10);
				864	tmp = (unsigned char *) out;
				865	*tmp = (unsigned char) tmp1;
				866	*(tmp + 1) = tmp1 >> 8;
				867	out++;
				868
				869	tmp2 = 0xDC00 \| (c & 0x03FF);
				870	tmp = (unsigned char *) out;
				871	*tmp = (unsigned char) tmp2;
				872	*(tmp + 1) = tmp2 >> 8;
				873	out++;
				874	}
				875	}
				876	else
				877	break;
				878	processed = in;
				879	}
				880	outlen = (out - outstart) 2;
				881	*inlen = processed - in;
				882	return(0);
				883	}
				884
				885	/**
				886	* UTF16BEToUTF8:
				887	* @out: a pointer to an array of bytes to store the result
				888	* @outlen: the length of @out
				889	* @inb: a pointer to an array of UTF-16 passwd as a byte array
				890	* @inlenb: the length of @in in UTF-16 chars
				891	*
				892	* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
				893	* block of chars out. This function assume the endian properity
				894	* is the same between the native type of this machine and the
				895	* inputed one.
				896	*
				897	* Returns the number of byte written, or -1 by lack of space, or -2
				898	* if the transcoding fails (for *in is not valid utf16 string)
				899	* The value of *inlen after return is the number of octets consumed
				900	* as the return value is positive, else unpredictiable.
				901	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	902	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	903	UTF16BEToUTF8(unsigned char* out, int *outlen,
				904	const unsigned char* inb, int *inlenb)
				905	{
				906	unsigned char* outstart = out;
				907	const unsigned char* processed = inb;
				908	unsigned char* outend = out + *outlen;
				909	unsigned short* in = (unsigned short*) inb;
				910	unsigned short* inend;
				911	unsigned int c, d, inlen;
				912	unsigned char *tmp;
				913	int bits;
				914
				915	if ((*inlenb % 2) == 1)
				916	(*inlenb)--;
				917	inlen = *inlenb / 2;
				918	inend= in + inlen;
				919	while (in < inend) {
				920	if (xmlLittleEndian) {
				921	tmp = (unsigned char *) in;
				922	c = *tmp++;
				923	c = c << 8;
				924	c = c \| (unsigned int) *tmp;
				925	in++;
				926	} else {
				927	c= *in++;
				928	}
				929	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				930	if (in >= inend) { /* (in > inend) shouldn't happens */
				931	*outlen = out - outstart;
				932	*inlenb = processed - inb;
				933	return(-2);
				934	}
				935	if (xmlLittleEndian) {
				936	tmp = (unsigned char *) in;
				937	d = *tmp++;
				938	d = d << 8;
				939	d = d \| (unsigned int) *tmp;
				940	in++;
				941	} else {
				942	d= *in++;
				943	}
				944	if ((d & 0xFC00) == 0xDC00) {
				945	c &= 0x03FF;
				946	c <<= 10;
				947	c \|= d & 0x03FF;
				948	c += 0x10000;
				949	}
				950	else {
				951	*outlen = out - outstart;
				952	*inlenb = processed - inb;
				953	return(-2);
				954	}
				955	}
				956
				957	/* assertion: c is a single UTF-4 value */
				958	if (out >= outend)
				959	break;
				960	if (c < 0x80) { *out++= c; bits= -6; }
				961	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
				962	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
				963	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
				964
				965	for ( ; bits >= 0; bits-= 6) {
				966	if (out >= outend)
				967	break;
				968	*out++= ((c >> bits) & 0x3F) \| 0x80;
				969	}
				970	processed = (const unsigned char*) in;
				971	}
				972	*outlen = out - outstart;
				973	*inlenb = processed - inb;
				974	return(0);
				975	}
				976
				977	/**
				978	* UTF8ToUTF16BE:
				979	* @outb: a pointer to an array of bytes to store the result
				980	* @outlen: the length of @outb
				981	* @in: a pointer to an array of UTF-8 chars
				982	* @inlen: the length of @in
				983	*
				984	* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
				985	* block of chars out.
				986	*
				987	* Returns the number of byte written, or -1 by lack of space, or -2
				988	* if the transcoding failed.
				989	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	990	static int
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	991	UTF8ToUTF16BE(unsigned char* outb, int *outlen,
				992	const unsigned char* in, int *inlen)
				993	{
				994	unsigned short* out = (unsigned short*) outb;
				995	const unsigned char* processed = in;
				996	unsigned short* outstart= out;
				997	unsigned short* outend;
				998	const unsigned char* inend= in+*inlen;
				999	unsigned int c, d;
				1000	int trailing;
				1001	unsigned char *tmp;
				1002	unsigned short tmp1, tmp2;
				1003
				1004	if (in == NULL) {
				1005	/*
				1006	* initialization, add the Byte Order Mark
				1007	*/
				1008	if (*outlen >= 2) {
				1009	outb[0] = 0xFE;
				1010	outb[1] = 0xFF;
				1011	*outlen = 2;
				1012	*inlen = 0;
				1013	#ifdef DEBUG_ENCODING
				1014	xmlGenericError(xmlGenericErrorContext,
				1015	"Added FEFF Byte Order Mark\n");
				1016	#endif
				1017	return(2);
				1018	}
				1019	*outlen = 0;
				1020	*inlen = 0;
				1021	return(0);
				1022	}
				1023	outend = out + (*outlen / 2);
				1024	while (in < inend) {
				1025	d= *in++;
				1026	if (d < 0x80) { c= d; trailing= 0; }
				1027	else if (d < 0xC0) {
				1028	/* trailing byte in leading position */
				1029	*outlen = out - outstart;
				1030	*inlen = processed - in;
				1031	return(-2);
				1032	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				1033	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				1034	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				1035	else {
				1036	/* no chance for this in UTF-16 */
				1037	*outlen = out - outstart;
				1038	*inlen = processed - in;
				1039	return(-2);
				1040	}
				1041
				1042	if (inend - in < trailing) {
				1043	break;
				1044	}
				1045
				1046	for ( ; trailing; trailing--) {
				1047	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80)) break;
				1048	c <<= 6;
				1049	c \|= d & 0x3F;
				1050	}
				1051
				1052	/* assertion: c is a single UTF-4 value */
				1053	if (c < 0x10000) {
				1054	if (out >= outend) break;
				1055	if (xmlLittleEndian) {
				1056	tmp = (unsigned char *) out;
				1057	*tmp = c >> 8;
				1058	*(tmp + 1) = c;
				1059	out++;
				1060	} else {
				1061	*out++ = c;
				1062	}
				1063	}
				1064	else if (c < 0x110000) {
				1065	if (out+1 >= outend) break;
				1066	c -= 0x10000;
				1067	if (xmlLittleEndian) {
				1068	tmp1 = 0xD800 \| (c >> 10);
				1069	tmp = (unsigned char *) out;
				1070	*tmp = tmp1 >> 8;
				1071	*(tmp + 1) = (unsigned char) tmp1;
				1072	out++;
				1073
				1074	tmp2 = 0xDC00 \| (c & 0x03FF);
				1075	tmp = (unsigned char *) out;
				1076	*tmp = tmp2 >> 8;
				1077	*(tmp + 1) = (unsigned char) tmp2;
				1078	out++;
				1079	} else {
				1080	*out++ = 0xD800 \| (c >> 10);
				1081	*out++ = 0xDC00 \| (c & 0x03FF);
				1082	}
				1083	}
				1084	else
				1085	break;
				1086	processed = in;
				1087	}
				1088	outlen = (out - outstart) 2;
				1089	*inlen = processed - in;
				1090	return(0);
				1091	}
				1092
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	1093	/************************************************************************
				1094	* *
				1095	* Generic encoding handling routines *
				1096	* *
				1097	************************************************************************/
				1098
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1099	/**
				1100	* xmlDetectCharEncoding:
				1101	* @in: a pointer to the first bytes of the XML entity, must be at least
				1102	* 4 bytes long.
				1103	* @len: pointer to the length of the buffer
				1104	*
				1105	* Guess the encoding of the entity using the first bytes of the entity content
				1106	* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
				1107	*
				1108	* Returns one of the XML_CHAR_ENCODING_... values.
				1109	*/
				1110	xmlCharEncoding
				1111	xmlDetectCharEncoding(const unsigned char* in, int len)
				1112	{
				1113	if (len >= 4) {
				1114	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				1115	(in[2] == 0x00) && (in[3] == 0x3C))
				1116	return(XML_CHAR_ENCODING_UCS4BE);
				1117	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
				1118	(in[2] == 0x00) && (in[3] == 0x00))
				1119	return(XML_CHAR_ENCODING_UCS4LE);
				1120	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				1121	(in[2] == 0x3C) && (in[3] == 0x00))
				1122	return(XML_CHAR_ENCODING_UCS4_2143);
				1123	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
				1124	(in[2] == 0x00) && (in[3] == 0x00))
				1125	return(XML_CHAR_ENCODING_UCS4_3412);
				1126	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
				1127	(in[2] == 0xA7) && (in[3] == 0x94))
				1128	return(XML_CHAR_ENCODING_EBCDIC);
				1129	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
				1130	(in[2] == 0x78) && (in[3] == 0x6D))
				1131	return(XML_CHAR_ENCODING_UTF8);
				1132	}
Daniel Veillard	87a764e	2001-06-20 17:41:10 +0000	[diff] [blame]	1133	if (len >= 3) {
				1134	/*
				1135	* Errata on XML-1.0 June 20 2001
				1136	* We now allow an UTF8 encoded BOM
				1137	*/
				1138	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
				1139	(in[2] == 0xBF))
				1140	return(XML_CHAR_ENCODING_UTF8);
				1141	}
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1142	if (len >= 2) {
				1143	if ((in[0] == 0xFE) && (in[1] == 0xFF))
				1144	return(XML_CHAR_ENCODING_UTF16BE);
				1145	if ((in[0] == 0xFF) && (in[1] == 0xFE))
				1146	return(XML_CHAR_ENCODING_UTF16LE);
				1147	}
				1148	return(XML_CHAR_ENCODING_NONE);
				1149	}
				1150
				1151	/**
				1152	* xmlCleanupEncodingAliases:
				1153	*
				1154	* Unregisters all aliases
				1155	*/
				1156	void
				1157	xmlCleanupEncodingAliases(void) {
				1158	int i;
				1159
				1160	if (xmlCharEncodingAliases == NULL)
				1161	return;
				1162
				1163	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1164	if (xmlCharEncodingAliases[i].name != NULL)
				1165	xmlFree((char *) xmlCharEncodingAliases[i].name);
				1166	if (xmlCharEncodingAliases[i].alias != NULL)
				1167	xmlFree((char *) xmlCharEncodingAliases[i].alias);
				1168	}
				1169	xmlCharEncodingAliasesNb = 0;
				1170	xmlCharEncodingAliasesMax = 0;
				1171	xmlFree(xmlCharEncodingAliases);
				1172	}
				1173
				1174	/**
				1175	* xmlGetEncodingAlias:
				1176	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				1177	*
				1178	* Lookup an encoding name for the given alias.
				1179	*
				1180	* Returns NULL if not found the original name otherwise
				1181	*/
				1182	const char *
				1183	xmlGetEncodingAlias(const char *alias) {
				1184	int i;
				1185	char upper[100];
				1186
				1187	if (alias == NULL)
				1188	return(NULL);
				1189
				1190	if (xmlCharEncodingAliases == NULL)
				1191	return(NULL);
				1192
				1193	for (i = 0;i < 99;i++) {
				1194	upper[i] = toupper(alias[i]);
				1195	if (upper[i] == 0) break;
				1196	}
				1197	upper[i] = 0;
				1198
				1199	/*
				1200	* Walk down the list looking for a definition of the alias
				1201	*/
				1202	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1203	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
				1204	return(xmlCharEncodingAliases[i].name);
				1205	}
				1206	}
				1207	return(NULL);
				1208	}
				1209
				1210	/**
				1211	* xmlAddEncodingAlias:
				1212	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
				1213	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				1214	*
				1215	* Registers and alias @alias for an encoding named @name. Existing alias
				1216	* will be overwritten.
				1217	*
				1218	* Returns 0 in case of success, -1 in case of error
				1219	*/
				1220	int
				1221	xmlAddEncodingAlias(const char name, const char alias) {
				1222	int i;
				1223	char upper[100];
				1224
				1225	if ((name == NULL) \|\| (alias == NULL))
				1226	return(-1);
				1227
				1228	for (i = 0;i < 99;i++) {
				1229	upper[i] = toupper(alias[i]);
				1230	if (upper[i] == 0) break;
				1231	}
				1232	upper[i] = 0;
				1233
				1234	if (xmlCharEncodingAliases == NULL) {
				1235	xmlCharEncodingAliasesNb = 0;
				1236	xmlCharEncodingAliasesMax = 20;
				1237	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
				1238	xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
				1239	if (xmlCharEncodingAliases == NULL)
				1240	return(-1);
				1241	} else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
				1242	xmlCharEncodingAliasesMax *= 2;
				1243	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
				1244	xmlRealloc(xmlCharEncodingAliases,
				1245	xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
				1246	}
				1247	/*
				1248	* Walk down the list looking for a definition of the alias
				1249	*/
				1250	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1251	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
				1252	/*
				1253	* Replace the definition.
				1254	*/
				1255	xmlFree((char *) xmlCharEncodingAliases[i].name);
				1256	xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
				1257	return(0);
				1258	}
				1259	}
				1260	/*
				1261	* Add the definition
				1262	*/
				1263	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
				1264	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
				1265	xmlCharEncodingAliasesNb++;
				1266	return(0);
				1267	}
				1268
				1269	/**
				1270	* xmlDelEncodingAlias:
				1271	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
				1272	*
				1273	* Unregisters an encoding alias @alias
				1274	*
				1275	* Returns 0 in case of success, -1 in case of error
				1276	*/
				1277	int
				1278	xmlDelEncodingAlias(const char *alias) {
				1279	int i;
				1280
				1281	if (alias == NULL)
				1282	return(-1);
				1283
				1284	if (xmlCharEncodingAliases == NULL)
				1285	return(-1);
				1286	/*
				1287	* Walk down the list looking for a definition of the alias
				1288	*/
				1289	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
				1290	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
				1291	xmlFree((char *) xmlCharEncodingAliases[i].name);
				1292	xmlFree((char *) xmlCharEncodingAliases[i].alias);
				1293	xmlCharEncodingAliasesNb--;
				1294	memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
				1295	sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
				1296	return(0);
				1297	}
				1298	}
				1299	return(-1);
				1300	}
				1301
				1302	/**
				1303	* xmlParseCharEncoding:
				1304	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
				1305	*
				1306	* Conpare the string to the known encoding schemes already known. Note
				1307	* that the comparison is case insensitive accordingly to the section
				1308	* [XML] 4.3.3 Character Encoding in Entities.
				1309	*
				1310	* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
				1311	* if not recognized.
				1312	*/
				1313	xmlCharEncoding
				1314	xmlParseCharEncoding(const char* name)
				1315	{
				1316	const char *alias;
				1317	char upper[500];
				1318	int i;
				1319
				1320	if (name == NULL)
				1321	return(XML_CHAR_ENCODING_NONE);
				1322
				1323	/*
				1324	* Do the alias resolution
				1325	*/
				1326	alias = xmlGetEncodingAlias(name);
				1327	if (alias != NULL)
				1328	name = alias;
				1329
				1330	for (i = 0;i < 499;i++) {
				1331	upper[i] = toupper(name[i]);
				1332	if (upper[i] == 0) break;
				1333	}
				1334	upper[i] = 0;
				1335
				1336	if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
				1337	if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
				1338	if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
				1339
				1340	/*
				1341	* NOTE: if we were able to parse this, the endianness of UTF16 is
				1342	* already found and in use
				1343	*/
				1344	if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
				1345	if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
				1346
				1347	if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				1348	if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				1349	if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
				1350
				1351	/*
				1352	* NOTE: if we were able to parse this, the endianness of UCS4 is
				1353	* already found and in use
				1354	*/
				1355	if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				1356	if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				1357	if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
				1358
				1359
				1360	if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
				1361	if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
				1362	if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
				1363
				1364	if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
				1365	if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
				1366	if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
				1367
				1368	if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
				1369	if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
				1370	if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
				1371	if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
				1372	if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
				1373	if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
				1374	if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
				1375
				1376	if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
				1377	if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
				1378	if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
				1379
				1380	#ifdef DEBUG_ENCODING
				1381	xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
				1382	#endif
				1383	return(XML_CHAR_ENCODING_ERROR);
				1384	}
				1385
				1386	/**
				1387	* xmlGetCharEncodingName:
				1388	* @enc: the encoding
				1389	*
				1390	* The "canonical" name for XML encoding.
				1391	* C.f. http://www.w3.org/TR/REC-xml#charencoding
				1392	* Section 4.3.3 Character Encoding in Entities
				1393	*
				1394	* Returns the canonical name for the given encoding
				1395	*/
				1396
				1397	const char*
				1398	xmlGetCharEncodingName(xmlCharEncoding enc) {
				1399	switch (enc) {
				1400	case XML_CHAR_ENCODING_ERROR:
				1401	return(NULL);
				1402	case XML_CHAR_ENCODING_NONE:
				1403	return(NULL);
				1404	case XML_CHAR_ENCODING_UTF8:
				1405	return("UTF-8");
				1406	case XML_CHAR_ENCODING_UTF16LE:
				1407	return("UTF-16");
				1408	case XML_CHAR_ENCODING_UTF16BE:
				1409	return("UTF-16");
				1410	case XML_CHAR_ENCODING_EBCDIC:
				1411	return("EBCDIC");
				1412	case XML_CHAR_ENCODING_UCS4LE:
				1413	return("ISO-10646-UCS-4");
				1414	case XML_CHAR_ENCODING_UCS4BE:
				1415	return("ISO-10646-UCS-4");
				1416	case XML_CHAR_ENCODING_UCS4_2143:
				1417	return("ISO-10646-UCS-4");
				1418	case XML_CHAR_ENCODING_UCS4_3412:
				1419	return("ISO-10646-UCS-4");
				1420	case XML_CHAR_ENCODING_UCS2:
				1421	return("ISO-10646-UCS-2");
				1422	case XML_CHAR_ENCODING_8859_1:
				1423	return("ISO-8859-1");
				1424	case XML_CHAR_ENCODING_8859_2:
				1425	return("ISO-8859-2");
				1426	case XML_CHAR_ENCODING_8859_3:
				1427	return("ISO-8859-3");
				1428	case XML_CHAR_ENCODING_8859_4:
				1429	return("ISO-8859-4");
				1430	case XML_CHAR_ENCODING_8859_5:
				1431	return("ISO-8859-5");
				1432	case XML_CHAR_ENCODING_8859_6:
				1433	return("ISO-8859-6");
				1434	case XML_CHAR_ENCODING_8859_7:
				1435	return("ISO-8859-7");
				1436	case XML_CHAR_ENCODING_8859_8:
				1437	return("ISO-8859-8");
				1438	case XML_CHAR_ENCODING_8859_9:
				1439	return("ISO-8859-9");
				1440	case XML_CHAR_ENCODING_2022_JP:
				1441	return("ISO-2022-JP");
				1442	case XML_CHAR_ENCODING_SHIFT_JIS:
				1443	return("Shift-JIS");
				1444	case XML_CHAR_ENCODING_EUC_JP:
				1445	return("EUC-JP");
				1446	case XML_CHAR_ENCODING_ASCII:
				1447	return(NULL);
				1448	}
				1449	return(NULL);
				1450	}
				1451
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	1452	/************************************************************************
				1453	* *
				1454	* Char encoding handlers *
				1455	* *
				1456	************************************************************************/
				1457
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1458
				1459	/* the size should be growable, but it's not a big deal ... */
				1460	#define MAX_ENCODING_HANDLERS 50
				1461	static xmlCharEncodingHandlerPtr *handlers = NULL;
				1462	static int nbCharEncodingHandler = 0;
				1463
				1464	/*
				1465	* The default is UTF-8 for XML, that's also the default used for the
				1466	* parser internals, so the default encoding handler is NULL
				1467	*/
				1468
				1469	static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
				1470
				1471	/**
				1472	* xmlNewCharEncodingHandler:
				1473	* @name: the encoding name, in UTF-8 format (ASCII actually)
				1474	* @input: the xmlCharEncodingInputFunc to read that encoding
				1475	* @output: the xmlCharEncodingOutputFunc to write that encoding
				1476	*
				1477	* Create and registers an xmlCharEncodingHandler.
				1478	* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
				1479	*/
Daniel Veillard	56a4cb8	2001-03-24 17:00:36 +0000	[diff] [blame]	1480	static xmlCharEncodingHandlerPtr
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1481	xmlNewCharEncodingHandler(const char *name,
				1482	xmlCharEncodingInputFunc input,
				1483	xmlCharEncodingOutputFunc output) {
				1484	xmlCharEncodingHandlerPtr handler;
				1485	const char *alias;
				1486	char upper[500];
				1487	int i;
				1488	char *up = 0;
				1489
				1490	/*
				1491	* Do the alias resolution
				1492	*/
				1493	alias = xmlGetEncodingAlias(name);
				1494	if (alias != NULL)
				1495	name = alias;
				1496
				1497	/*
				1498	* Keep only the uppercase version of the encoding.
				1499	*/
				1500	if (name == NULL) {
				1501	xmlGenericError(xmlGenericErrorContext,
				1502	"xmlNewCharEncodingHandler : no name !\n");
				1503	return(NULL);
				1504	}
				1505	for (i = 0;i < 499;i++) {
				1506	upper[i] = toupper(name[i]);
				1507	if (upper[i] == 0) break;
				1508	}
				1509	upper[i] = 0;
				1510	up = xmlMemStrdup(upper);
				1511	if (up == NULL) {
				1512	xmlGenericError(xmlGenericErrorContext,
				1513	"xmlNewCharEncodingHandler : out of memory !\n");
				1514	return(NULL);
				1515	}
				1516
				1517	/*
				1518	* allocate and fill-up an handler block.
				1519	*/
				1520	handler = (xmlCharEncodingHandlerPtr)
				1521	xmlMalloc(sizeof(xmlCharEncodingHandler));
				1522	if (handler == NULL) {
				1523	xmlGenericError(xmlGenericErrorContext,
				1524	"xmlNewCharEncodingHandler : out of memory !\n");
				1525	return(NULL);
				1526	}
				1527	handler->input = input;
				1528	handler->output = output;
				1529	handler->name = up;
				1530
				1531	#ifdef LIBXML_ICONV_ENABLED
				1532	handler->iconv_in = NULL;
				1533	handler->iconv_out = NULL;
				1534	#endif /* LIBXML_ICONV_ENABLED */
				1535
				1536	/*
				1537	* registers and returns the handler.
				1538	*/
				1539	xmlRegisterCharEncodingHandler(handler);
				1540	#ifdef DEBUG_ENCODING
				1541	xmlGenericError(xmlGenericErrorContext,
				1542	"Registered encoding handler for %s\n", name);
				1543	#endif
				1544	return(handler);
				1545	}
				1546
				1547	/**
				1548	* xmlInitCharEncodingHandlers:
				1549	*
				1550	* Initialize the char encoding support, it registers the default
				1551	* encoding supported.
				1552	* NOTE: while public, this function usually doesn't need to be called
				1553	* in normal processing.
				1554	*/
				1555	void
				1556	xmlInitCharEncodingHandlers(void) {
				1557	unsigned short int tst = 0x1234;
				1558	unsigned char ptr = (unsigned char ) &tst;
				1559
				1560	if (handlers != NULL) return;
				1561
				1562	handlers = (xmlCharEncodingHandlerPtr *)
				1563	xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
				1564
				1565	if (*ptr == 0x12) xmlLittleEndian = 0;
				1566	else if (*ptr == 0x34) xmlLittleEndian = 1;
				1567	else xmlGenericError(xmlGenericErrorContext,
				1568	"Odd problem at endianness detection\n");
				1569
				1570	if (handlers == NULL) {
				1571	xmlGenericError(xmlGenericErrorContext,
				1572	"xmlInitCharEncodingHandlers : out of memory !\n");
				1573	return;
				1574	}
				1575	xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
				1576	xmlUTF16LEHandler =
				1577	xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
				1578	xmlUTF16BEHandler =
				1579	xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
				1580	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
				1581	xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
Daniel Veillard	2004242	2001-05-31 18:22:04 +0000	[diff] [blame]	1582	xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1583	#ifdef LIBXML_HTML_ENABLED
				1584	xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
				1585	#endif
				1586	}
				1587
				1588	/**
				1589	* xmlCleanupCharEncodingHandlers:
				1590	*
				1591	* Cleanup the memory allocated for the char encoding support, it
				1592	* unregisters all the encoding handlers and the aliases.
				1593	*/
				1594	void
				1595	xmlCleanupCharEncodingHandlers(void) {
				1596	xmlCleanupEncodingAliases();
				1597
				1598	if (handlers == NULL) return;
				1599
				1600	for (;nbCharEncodingHandler > 0;) {
				1601	nbCharEncodingHandler--;
				1602	if (handlers[nbCharEncodingHandler] != NULL) {
				1603	if (handlers[nbCharEncodingHandler]->name != NULL)
				1604	xmlFree(handlers[nbCharEncodingHandler]->name);
				1605	xmlFree(handlers[nbCharEncodingHandler]);
				1606	}
				1607	}
				1608	xmlFree(handlers);
				1609	handlers = NULL;
				1610	nbCharEncodingHandler = 0;
				1611	xmlDefaultCharEncodingHandler = NULL;
				1612	}
				1613
				1614	/**
				1615	* xmlRegisterCharEncodingHandler:
				1616	* @handler: the xmlCharEncodingHandlerPtr handler block
				1617	*
				1618	* Register the char encoding handler, surprizing, isn't it ?
				1619	*/
				1620	void
				1621	xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
				1622	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1623	if (handler == NULL) {
				1624	xmlGenericError(xmlGenericErrorContext,
				1625	"xmlRegisterCharEncodingHandler: NULL handler !\n");
				1626	return;
				1627	}
				1628
				1629	if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
				1630	xmlGenericError(xmlGenericErrorContext,
				1631	"xmlRegisterCharEncodingHandler: Too many handler registered\n");
				1632	xmlGenericError(xmlGenericErrorContext,
				1633	"\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
				1634	return;
				1635	}
				1636	handlers[nbCharEncodingHandler++] = handler;
				1637	}
				1638
				1639	/**
				1640	* xmlGetCharEncodingHandler:
				1641	* @enc: an xmlCharEncoding value.
				1642	*
				1643	* Search in the registrered set the handler able to read/write that encoding.
				1644	*
				1645	* Returns the handler or NULL if not found
				1646	*/
				1647	xmlCharEncodingHandlerPtr
				1648	xmlGetCharEncodingHandler(xmlCharEncoding enc) {
				1649	xmlCharEncodingHandlerPtr handler;
				1650
				1651	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1652	switch (enc) {
				1653	case XML_CHAR_ENCODING_ERROR:
				1654	return(NULL);
				1655	case XML_CHAR_ENCODING_NONE:
				1656	return(NULL);
				1657	case XML_CHAR_ENCODING_UTF8:
				1658	return(NULL);
				1659	case XML_CHAR_ENCODING_UTF16LE:
				1660	return(xmlUTF16LEHandler);
				1661	case XML_CHAR_ENCODING_UTF16BE:
				1662	return(xmlUTF16BEHandler);
				1663	case XML_CHAR_ENCODING_EBCDIC:
				1664	handler = xmlFindCharEncodingHandler("EBCDIC");
				1665	if (handler != NULL) return(handler);
				1666	handler = xmlFindCharEncodingHandler("ebcdic");
				1667	if (handler != NULL) return(handler);
				1668	break;
				1669	case XML_CHAR_ENCODING_UCS4BE:
				1670	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
				1671	if (handler != NULL) return(handler);
				1672	handler = xmlFindCharEncodingHandler("UCS-4");
				1673	if (handler != NULL) return(handler);
				1674	handler = xmlFindCharEncodingHandler("UCS4");
				1675	if (handler != NULL) return(handler);
				1676	break;
				1677	case XML_CHAR_ENCODING_UCS4LE:
				1678	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
				1679	if (handler != NULL) return(handler);
				1680	handler = xmlFindCharEncodingHandler("UCS-4");
				1681	if (handler != NULL) return(handler);
				1682	handler = xmlFindCharEncodingHandler("UCS4");
				1683	if (handler != NULL) return(handler);
				1684	break;
				1685	case XML_CHAR_ENCODING_UCS4_2143:
				1686	break;
				1687	case XML_CHAR_ENCODING_UCS4_3412:
				1688	break;
				1689	case XML_CHAR_ENCODING_UCS2:
				1690	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
				1691	if (handler != NULL) return(handler);
				1692	handler = xmlFindCharEncodingHandler("UCS-2");
				1693	if (handler != NULL) return(handler);
				1694	handler = xmlFindCharEncodingHandler("UCS2");
				1695	if (handler != NULL) return(handler);
				1696	break;
				1697
				1698	/*
				1699	* We used to keep ISO Latin encodings native in the
				1700	* generated data. This led to so many problems that
				1701	* this has been removed. One can still change this
				1702	* back by registering no-ops encoders for those
				1703	*/
				1704	case XML_CHAR_ENCODING_8859_1:
				1705	handler = xmlFindCharEncodingHandler("ISO-8859-1");
				1706	if (handler != NULL) return(handler);
				1707	break;
				1708	case XML_CHAR_ENCODING_8859_2:
				1709	handler = xmlFindCharEncodingHandler("ISO-8859-2");
				1710	if (handler != NULL) return(handler);
				1711	break;
				1712	case XML_CHAR_ENCODING_8859_3:
				1713	handler = xmlFindCharEncodingHandler("ISO-8859-3");
				1714	if (handler != NULL) return(handler);
				1715	break;
				1716	case XML_CHAR_ENCODING_8859_4:
				1717	handler = xmlFindCharEncodingHandler("ISO-8859-4");
				1718	if (handler != NULL) return(handler);
				1719	break;
				1720	case XML_CHAR_ENCODING_8859_5:
				1721	handler = xmlFindCharEncodingHandler("ISO-8859-5");
				1722	if (handler != NULL) return(handler);
				1723	break;
				1724	case XML_CHAR_ENCODING_8859_6:
				1725	handler = xmlFindCharEncodingHandler("ISO-8859-6");
				1726	if (handler != NULL) return(handler);
				1727	break;
				1728	case XML_CHAR_ENCODING_8859_7:
				1729	handler = xmlFindCharEncodingHandler("ISO-8859-7");
				1730	if (handler != NULL) return(handler);
				1731	break;
				1732	case XML_CHAR_ENCODING_8859_8:
				1733	handler = xmlFindCharEncodingHandler("ISO-8859-8");
				1734	if (handler != NULL) return(handler);
				1735	break;
				1736	case XML_CHAR_ENCODING_8859_9:
				1737	handler = xmlFindCharEncodingHandler("ISO-8859-9");
				1738	if (handler != NULL) return(handler);
				1739	break;
				1740
				1741
				1742	case XML_CHAR_ENCODING_2022_JP:
				1743	handler = xmlFindCharEncodingHandler("ISO-2022-JP");
				1744	if (handler != NULL) return(handler);
				1745	break;
				1746	case XML_CHAR_ENCODING_SHIFT_JIS:
				1747	handler = xmlFindCharEncodingHandler("SHIFT-JIS");
				1748	if (handler != NULL) return(handler);
				1749	handler = xmlFindCharEncodingHandler("SHIFT_JIS");
				1750	if (handler != NULL) return(handler);
				1751	handler = xmlFindCharEncodingHandler("Shift_JIS");
				1752	if (handler != NULL) return(handler);
				1753	break;
				1754	case XML_CHAR_ENCODING_EUC_JP:
				1755	handler = xmlFindCharEncodingHandler("EUC-JP");
				1756	if (handler != NULL) return(handler);
				1757	break;
				1758	default:
				1759	break;
				1760	}
				1761
				1762	#ifdef DEBUG_ENCODING
				1763	xmlGenericError(xmlGenericErrorContext,
				1764	"No handler found for encoding %d\n", enc);
				1765	#endif
				1766	return(NULL);
				1767	}
				1768
				1769	/**
Daniel Veillard	5e2dace	2001-07-18 19:30:27 +0000	[diff] [blame]	1770	* xmlFindCharEncodingHandler:
				1771	* @name: a string describing the char encoding.
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1772	*
				1773	* Search in the registrered set the handler able to read/write that encoding.
				1774	*
				1775	* Returns the handler or NULL if not found
				1776	*/
				1777	xmlCharEncodingHandlerPtr
				1778	xmlFindCharEncodingHandler(const char *name) {
				1779	const char *nalias;
				1780	const char *norig;
				1781	xmlCharEncoding alias;
				1782	#ifdef LIBXML_ICONV_ENABLED
				1783	xmlCharEncodingHandlerPtr enc;
				1784	iconv_t icv_in, icv_out;
				1785	#endif /* LIBXML_ICONV_ENABLED */
				1786	char upper[100];
				1787	int i;
				1788
				1789	if (handlers == NULL) xmlInitCharEncodingHandlers();
				1790	if (name == NULL) return(xmlDefaultCharEncodingHandler);
				1791	if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
				1792
				1793	/*
				1794	* Do the alias resolution
				1795	*/
				1796	norig = name;
				1797	nalias = xmlGetEncodingAlias(name);
				1798	if (nalias != NULL)
				1799	name = nalias;
				1800
				1801	/*
				1802	* Check first for directly registered encoding names
				1803	*/
				1804	for (i = 0;i < 99;i++) {
				1805	upper[i] = toupper(name[i]);
				1806	if (upper[i] == 0) break;
				1807	}
				1808	upper[i] = 0;
				1809
				1810	for (i = 0;i < nbCharEncodingHandler; i++)
				1811	if (!strcmp(upper, handlers[i]->name)) {
				1812	#ifdef DEBUG_ENCODING
				1813	xmlGenericError(xmlGenericErrorContext,
				1814	"Found registered handler for encoding %s\n", name);
				1815	#endif
				1816	return(handlers[i]);
				1817	}
				1818
				1819	#ifdef LIBXML_ICONV_ENABLED
				1820	/* check whether iconv can handle this */
				1821	icv_in = iconv_open("UTF-8", name);
				1822	icv_out = iconv_open(name, "UTF-8");
				1823	if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
				1824	enc = (xmlCharEncodingHandlerPtr)
				1825	xmlMalloc(sizeof(xmlCharEncodingHandler));
				1826	if (enc == NULL) {
				1827	iconv_close(icv_in);
				1828	iconv_close(icv_out);
				1829	return(NULL);
				1830	}
				1831	enc->name = xmlMemStrdup(name);
				1832	enc->input = NULL;
				1833	enc->output = NULL;
				1834	enc->iconv_in = icv_in;
				1835	enc->iconv_out = icv_out;
				1836	#ifdef DEBUG_ENCODING
				1837	xmlGenericError(xmlGenericErrorContext,
				1838	"Found iconv handler for encoding %s\n", name);
				1839	#endif
				1840	return enc;
				1841	} else if ((icv_in != (iconv_t) -1) \|\| icv_out != (iconv_t) -1) {
				1842	xmlGenericError(xmlGenericErrorContext,
				1843	"iconv : problems with filters for '%s'\n", name);
				1844	}
				1845	#endif /* LIBXML_ICONV_ENABLED */
				1846
				1847	#ifdef DEBUG_ENCODING
				1848	xmlGenericError(xmlGenericErrorContext,
				1849	"No handler found for encoding %s\n", name);
				1850	#endif
				1851
				1852	/*
				1853	* Fallback using the canonical names
				1854	*/
				1855	alias = xmlParseCharEncoding(norig);
				1856	if (alias != XML_CHAR_ENCODING_ERROR) {
				1857	const char* canon;
				1858	canon = xmlGetCharEncodingName(alias);
				1859	if ((canon != NULL) && (strcmp(name, canon))) {
				1860	return(xmlFindCharEncodingHandler(canon));
				1861	}
				1862	}
				1863
				1864	return(NULL);
				1865	}
				1866
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	1867	/************************************************************************
				1868	* *
				1869	* ICONV based generic conversion functions *
				1870	* *
				1871	************************************************************************/
				1872
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1873	#ifdef LIBXML_ICONV_ENABLED
				1874	/**
				1875	* xmlIconvWrapper:
				1876	* @cd: iconv converter data structure
				1877	* @out: a pointer to an array of bytes to store the result
				1878	* @outlen: the length of @out
				1879	* @in: a pointer to an array of ISO Latin 1 chars
				1880	* @inlen: the length of @in
				1881	*
				1882	* Returns 0 if success, or
				1883	* -1 by lack of space, or
				1884	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1885	* the result of transformation can't fit into the encoding we want), or
				1886	* -3 if there the last byte can't form a single output char.
				1887	*
				1888	* The value of @inlen after return is the number of octets consumed
				1889	* as the return value is positive, else unpredictiable.
				1890	* The value of @outlen after return is the number of ocetes consumed.
				1891	*/
				1892	static int
				1893	xmlIconvWrapper(iconv_t cd,
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1894	unsigned char out, int outlen,
				1895	const unsigned char in, int inlen) {
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1896
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1897	size_t icv_inlen = inlen, icv_outlen = outlen;
				1898	const char icv_in = (const char ) in;
				1899	char icv_out = (char ) out;
				1900	int ret;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1901
Darin Adler	699613b	2001-07-27 22:47:14 +0000	[diff] [blame]	1902	ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1903	if (in != NULL) {
				1904	*inlen -= icv_inlen;
				1905	*outlen -= icv_outlen;
				1906	} else {
				1907	*inlen = 0;
				1908	*outlen = 0;
				1909	}
				1910	if ((icv_inlen != 0) \|\| (ret == -1)) {
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1911	#ifdef EILSEQ
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1912	if (errno == EILSEQ) {
				1913	return -2;
				1914	} else
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1915	#endif
				1916	#ifdef E2BIG
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1917	if (errno == E2BIG) {
				1918	return -1;
				1919	} else
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1920	#endif
				1921	#ifdef EINVAL
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1922	if (errno == EINVAL) {
				1923	return -3;
				1924	} else
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1925	#endif
Daniel Veillard	9403a04	2001-05-28 11:00:53 +0000	[diff] [blame]	1926	{
				1927	return -3;
				1928	}
				1929	}
				1930	return 0;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1931	}
				1932	#endif /* LIBXML_ICONV_ENABLED */
				1933
Daniel Veillard	97ac131	2001-05-30 19:14:17 +0000	[diff] [blame]	1934	/************************************************************************
				1935	* *
				1936	* The real API used by libxml for on-the-fly conversion *
				1937	* *
				1938	************************************************************************/
				1939
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	1940	/**
				1941	* xmlCharEncFirstLine:
				1942	* @handler: char enconding transformation data structure
				1943	* @out: an xmlBuffer for the output.
				1944	* @in: an xmlBuffer for the input
				1945	*
				1946	* Front-end for the encoding handler input function, but handle only
				1947	* the very first line, i.e. limit itself to 45 chars.
				1948	*
				1949	* Returns the number of byte written if success, or
				1950	* -1 general error
				1951	* -2 if the transcoding fails (for *in is not valid utf8 string or
				1952	* the result of transformation can't fit into the encoding we want), or
				1953	*/
				1954	int
				1955	xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
				1956	xmlBufferPtr in) {
				1957	int ret = -2;
				1958	int written;
				1959	int toconv;
				1960
				1961	if (handler == NULL) return(-1);
				1962	if (out == NULL) return(-1);
				1963	if (in == NULL) return(-1);
				1964
				1965	written = out->size - out->use;
				1966	toconv = in->use;
				1967	if (toconv * 2 >= written) {
				1968	xmlBufferGrow(out, toconv);
				1969	written = out->size - out->use - 1;
				1970	}
				1971
				1972	/*
				1973	* echo '<?xml version="1.0" encoding="UCS4"?>' \| wc -c => 38
				1974	* 45 chars should be sufficient to reach the end of the encoding
				1975	* decalration without going too far inside the document content.
				1976	*/
				1977	written = 45;
				1978
				1979	if (handler->input != NULL) {
				1980	ret = handler->input(&out->content[out->use], &written,
				1981	in->content, &toconv);
				1982	xmlBufferShrink(in, toconv);
				1983	out->use += written;
				1984	out->content[out->use] = 0;
				1985	}
				1986	#ifdef LIBXML_ICONV_ENABLED
				1987	else if (handler->iconv_in != NULL) {
				1988	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
				1989	&written, in->content, &toconv);
				1990	xmlBufferShrink(in, toconv);
				1991	out->use += written;
				1992	out->content[out->use] = 0;
				1993	if (ret == -1) ret = -3;
				1994	}
				1995	#endif /* LIBXML_ICONV_ENABLED */
				1996	#ifdef DEBUG_ENCODING
				1997	switch (ret) {
				1998	case 0:
				1999	xmlGenericError(xmlGenericErrorContext,
				2000	"converted %d bytes to %d bytes of input\n",
				2001	toconv, written);
				2002	break;
				2003	case -1:
				2004	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				2005	toconv, written, in->use);
				2006	break;
				2007	case -2:
				2008	xmlGenericError(xmlGenericErrorContext,
				2009	"input conversion failed due to input error\n");
				2010	break;
				2011	case -3:
				2012	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
				2013	toconv, written, in->use);
				2014	break;
				2015	default:
				2016	xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
				2017	}
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2018	#endif /* DEBUG_ENCODING */
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2019	/*
				2020	* Ignore when input buffer is not on a boundary
				2021	*/
				2022	if (ret == -3) ret = 0;
				2023	if (ret == -1) ret = 0;
				2024	return(ret);
				2025	}
				2026
				2027	/**
				2028	* xmlCharEncInFunc:
				2029	* @handler: char enconding transformation data structure
				2030	* @out: an xmlBuffer for the output.
				2031	* @in: an xmlBuffer for the input
				2032	*
				2033	* Generic front-end for the encoding handler input function
				2034	*
				2035	* Returns the number of byte written if success, or
				2036	* -1 general error
				2037	* -2 if the transcoding fails (for *in is not valid utf8 string or
				2038	* the result of transformation can't fit into the encoding we want), or
				2039	*/
				2040	int
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2041	xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
				2042	xmlBufferPtr in)
				2043	{
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2044	int ret = -2;
				2045	int written;
				2046	int toconv;
				2047
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2048	if (handler == NULL)
				2049	return (-1);
				2050	if (out == NULL)
				2051	return (-1);
				2052	if (in == NULL)
				2053	return (-1);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2054
				2055	toconv = in->use;
				2056	if (toconv == 0)
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2057	return (0);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2058	written = out->size - out->use;
				2059	if (toconv * 2 >= written) {
				2060	xmlBufferGrow(out, out->size + toconv * 2);
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2061	written = out->size - out->use - 1;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2062	}
				2063	if (handler->input != NULL) {
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2064	ret = handler->input(&out->content[out->use], &written,
				2065	in->content, &toconv);
				2066	xmlBufferShrink(in, toconv);
				2067	out->use += written;
				2068	out->content[out->use] = 0;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2069	}
				2070	#ifdef LIBXML_ICONV_ENABLED
				2071	else if (handler->iconv_in != NULL) {
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2072	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
				2073	&written, in->content, &toconv);
				2074	xmlBufferShrink(in, toconv);
				2075	out->use += written;
				2076	out->content[out->use] = 0;
				2077	if (ret == -1)
				2078	ret = -3;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2079	}
				2080	#endif /* LIBXML_ICONV_ENABLED */
				2081	switch (ret) {
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2082	case 0:
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2083	#ifdef DEBUG_ENCODING
				2084	xmlGenericError(xmlGenericErrorContext,
				2085	"converted %d bytes to %d bytes of input\n",
				2086	toconv, written);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2087	#endif
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2088	break;
				2089	case -1:
				2090	#ifdef DEBUG_ENCODING
				2091	xmlGenericError(xmlGenericErrorContext,
				2092	"converted %d bytes to %d bytes of input, %d left\n",
				2093	toconv, written, in->use);
				2094	#endif
				2095	break;
				2096	case -3:
				2097	#ifdef DEBUG_ENCODING
				2098	xmlGenericError(xmlGenericErrorContext,
				2099	"converted %d bytes to %d bytes of input, %d left\n",
				2100	toconv, written, in->use);
				2101	#endif
				2102	break;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2103	case -2:
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2104	xmlGenericError(xmlGenericErrorContext,
				2105	"input conversion failed due to input error\n");
				2106	xmlGenericError(xmlGenericErrorContext,
				2107	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				2108	in->content[0], in->content[1],
				2109	in->content[2], in->content[3]);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2110	}
				2111	/*
				2112	* Ignore when input buffer is not on a boundary
				2113	*/
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2114	if (ret == -3)
				2115	ret = 0;
				2116	return (ret);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2117	}
				2118
				2119	/**
				2120	* xmlCharEncOutFunc:
				2121	* @handler: char enconding transformation data structure
				2122	* @out: an xmlBuffer for the output.
				2123	* @in: an xmlBuffer for the input
				2124	*
				2125	* Generic front-end for the encoding handler output function
				2126	* a first call with @in == NULL has to be made firs to initiate the
				2127	* output in case of non-stateless encoding needing to initiate their
				2128	* state or the output (like the BOM in UTF16).
				2129	* In case of UTF8 sequence conversion errors for the given encoder,
				2130	* the content will be automatically remapped to a CharRef sequence.
				2131	*
				2132	* Returns the number of byte written if success, or
				2133	* -1 general error
				2134	* -2 if the transcoding fails (for *in is not valid utf8 string or
				2135	* the result of transformation can't fit into the encoding we want), or
				2136	*/
				2137	int
				2138	xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
				2139	xmlBufferPtr in) {
				2140	int ret = -2;
				2141	int written;
				2142	int writtentot = 0;
				2143	int toconv;
				2144	int output = 0;
				2145
				2146	if (handler == NULL) return(-1);
				2147	if (out == NULL) return(-1);
				2148
				2149	retry:
				2150
				2151	written = out->size - out->use;
				2152
				2153	/*
				2154	* First specific handling of in = NULL, i.e. the initialization call
				2155	*/
				2156	if (in == NULL) {
				2157	toconv = 0;
				2158	if (handler->output != NULL) {
				2159	ret = handler->output(&out->content[out->use], &written,
				2160	NULL, &toconv);
				2161	out->use += written;
				2162	out->content[out->use] = 0;
				2163	}
				2164	#ifdef LIBXML_ICONV_ENABLED
				2165	else if (handler->iconv_out != NULL) {
				2166	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
				2167	&written, NULL, &toconv);
				2168	out->use += written;
				2169	out->content[out->use] = 0;
				2170	}
				2171	#endif /* LIBXML_ICONV_ENABLED */
				2172	#ifdef DEBUG_ENCODING
				2173	xmlGenericError(xmlGenericErrorContext,
				2174	"initialized encoder\n");
				2175	#endif
				2176	return(0);
				2177	}
				2178
				2179	/*
				2180	* Convertion itself.
				2181	*/
				2182	toconv = in->use;
				2183	if (toconv == 0)
				2184	return(0);
				2185	if (toconv * 2 >= written) {
				2186	xmlBufferGrow(out, toconv * 2);
				2187	written = out->size - out->use - 1;
				2188	}
				2189	if (handler->output != NULL) {
				2190	ret = handler->output(&out->content[out->use], &written,
				2191	in->content, &toconv);
				2192	xmlBufferShrink(in, toconv);
				2193	out->use += written;
				2194	writtentot += written;
				2195	out->content[out->use] = 0;
				2196	}
				2197	#ifdef LIBXML_ICONV_ENABLED
				2198	else if (handler->iconv_out != NULL) {
				2199	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
				2200	&written, in->content, &toconv);
				2201	xmlBufferShrink(in, toconv);
				2202	out->use += written;
				2203	writtentot += written;
				2204	out->content[out->use] = 0;
				2205	if (ret == -1) {
				2206	if (written > 0) {
				2207	/*
				2208	* Can be a limitation of iconv
				2209	*/
				2210	goto retry;
				2211	}
				2212	ret = -3;
				2213	}
				2214	}
				2215	#endif /* LIBXML_ICONV_ENABLED */
				2216	else {
				2217	xmlGenericError(xmlGenericErrorContext,
				2218	"xmlCharEncOutFunc: no output function !\n");
				2219	return(-1);
				2220	}
				2221
				2222	if (ret >= 0) output += ret;
				2223
				2224	/*
				2225	* Attempt to handle error cases
				2226	*/
				2227	switch (ret) {
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2228	case 0:
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2229	#ifdef DEBUG_ENCODING
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2230	xmlGenericError(xmlGenericErrorContext,
				2231	"converted %d bytes to %d bytes of output\n",
				2232	toconv, written);
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2233	#endif
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2234	break;
				2235	case -1:
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2236	#ifdef DEBUG_ENCODING
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2237	xmlGenericError(xmlGenericErrorContext,
				2238	"output conversion failed by lack of space\n");
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2239	#endif
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2240	break;
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2241	case -3:
				2242	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
				2243	toconv, written, in->use);
				2244	break;
				2245	case -2: {
				2246	int len = in->use;
				2247	const xmlChar utf = (const xmlChar ) in->content;
				2248	int cur;
				2249
				2250	cur = xmlGetUTF8Char(utf, &len);
				2251	if (cur > 0) {
				2252	xmlChar charref[20];
				2253
				2254	#ifdef DEBUG_ENCODING
				2255	xmlGenericError(xmlGenericErrorContext,
				2256	"handling output conversion error\n");
				2257	xmlGenericError(xmlGenericErrorContext,
				2258	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				2259	in->content[0], in->content[1],
				2260	in->content[2], in->content[3]);
				2261	#endif
				2262	/*
				2263	* Removes the UTF8 sequence, and replace it by a charref
				2264	* and continue the transcoding phase, hoping the error
				2265	* did not mangle the encoder state.
				2266	*/
Daniel Veillard	1669828	2001-09-14 10:29:27 +0000	[diff] [blame]	2267	sprintf((char *) charref, "&#%d;", cur);
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2268	xmlBufferShrink(in, len);
				2269	xmlBufferAddHead(in, charref, -1);
				2270
				2271	goto retry;
				2272	} else {
				2273	xmlGenericError(xmlGenericErrorContext,
				2274	"output conversion failed due to conv error\n");
				2275	xmlGenericError(xmlGenericErrorContext,
				2276	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				2277	in->content[0], in->content[1],
				2278	in->content[2], in->content[3]);
				2279	in->content[0] = ' ';
				2280	}
				2281	break;
				2282	}
				2283	}
				2284	return(ret);
				2285	}
				2286
				2287	/**
				2288	* xmlCharEncCloseFunc:
				2289	* @handler: char enconding transformation data structure
				2290	*
				2291	* Generic front-end for hencoding handler close function
				2292	*
				2293	* Returns 0 if success, or -1 in case of error
				2294	*/
				2295	int
				2296	xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
				2297	int ret = 0;
				2298	if (handler == NULL) return(-1);
				2299	if (handler->name == NULL) return(-1);
				2300	#ifdef LIBXML_ICONV_ENABLED
				2301	/*
				2302	* Iconv handlers can be oused only once, free the whole block.
				2303	* and the associated icon resources.
				2304	*/
				2305	if ((handler->iconv_out != NULL) \|\| (handler->iconv_in != NULL)) {
				2306	if (handler->name != NULL)
				2307	xmlFree(handler->name);
				2308	handler->name = NULL;
				2309	if (handler->iconv_out != NULL) {
				2310	if (iconv_close(handler->iconv_out))
				2311	ret = -1;
				2312	handler->iconv_out = NULL;
				2313	}
				2314	if (handler->iconv_in != NULL) {
				2315	if (iconv_close(handler->iconv_in))
				2316	ret = -1;
				2317	handler->iconv_in = NULL;
				2318	}
				2319	xmlFree(handler);
				2320	}
				2321	#endif /* LIBXML_ICONV_ENABLED */
				2322	#ifdef DEBUG_ENCODING
				2323	if (ret)
				2324	xmlGenericError(xmlGenericErrorContext,
				2325	"failed to close the encoding handler\n");
				2326	else
				2327	xmlGenericError(xmlGenericErrorContext,
				2328	"closed the encoding handler\n");
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2329	#endif
Daniel Veillard	d79bcd1	2001-06-21 22:07:42 +0000	[diff] [blame]	2330
Owen Taylor	3473f88	2001-02-23 17:55:21 +0000	[diff] [blame]	2331	return(ret);
				2332	}
				2333