Blame - encoding.c - fp2-dev/platform/external/libxml2

blob: 3d65469edd6ad269f58901b5938f0ae065660980 [file] [log] [blame]

Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	1	/*
				2	* encoding.c : implements the encoding conversion functions needed for XML
				3	*
				4	* Related specs:
				5	* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
				6	* [ISO-10646] UTF-8 and UTF-16 in Annexes
				7	* [ISO-8859-1] ISO Latin-1 characters codes.
				8	* [UNICODE] The Unicode Consortium, "The Unicode Standard --
				9	* Worldwide Character Encoding -- Version 1.0", Addison-
				10	* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
				11	* described in Unicode Technical Report #4.
				12	* [US-ASCII] Coded Character Set--7-bit American Standard Code for
				13	* Information Interchange, ANSI X3.4-1986.
				14	*
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	15	* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	16	*
				17	* See Copyright for the status of this software.
				18	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	19	* Daniel.Veillard@w3.org
				20	*/
				21
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	22	#include "config.h"
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	23	#include <ctype.h>
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	24	#include <string.h>
				25	#include <stdio.h>
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	26	#ifdef HAVE_MALLOC_H
				27	#include <malloc.h>
				28	#endif
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	29	#include "encoding.h"
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	30	#ifdef HAVE_UNICODE_H
				31	#include <unicode.h>
				32	#endif
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	33
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	34	#ifdef HAVE_UNICODE_H
				35
				36	#else /* ! HAVE_UNICODE_H */
Daniel Veillard	0ba4d53	1998-11-01 19:34:31 +0000	[diff] [blame]	37	/*
				38	* From rfc2044: encoding of the Unicode values on UTF-8:
				39	*
				40	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				41	* 0000 0000-0000 007F 0xxxxxxx
				42	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				43	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				44	*
				45	* I hope we won't use values > 0xFFFF anytime soon !
				46	*/
				47
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	48	/**
				49	* isolat1ToUTF8:
				50	* @out: a pointer ot an array of bytes to store the result
				51	* @outlen: the lenght of @out
				52	* @in: a pointer ot an array of ISO Latin 1 chars
				53	* @inlen: the lenght of @in
				54	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	55	* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
				56	* block of chars out.
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	57	* Returns the number of byte written, or -1 by lack of space.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	58	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	59	int
				60	isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	61	{
				62	unsigned char* outstart= out;
				63	unsigned char* outend= out+outlen;
				64	unsigned char* inend= in+inlen;
				65	unsigned char c;
				66
				67	while (in < inend) {
				68	c= *in++;
				69	if (c < 0x80) {
				70	if (out >= outend) return -1;
				71	*out++ = c;
				72	}
				73	else {
				74	if (out >= outend) return -1;
				75	*out++ = 0xC0 \| (c >> 6);
				76	if (out >= outend) return -1;
				77	*out++ = 0x80 \| (0x3F & c);
				78	}
				79	}
				80	return out-outstart;
				81	}
				82
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	83	/**
				84	* UTF8Toisolat1:
				85	* @out: a pointer ot an array of bytes to store the result
				86	* @outlen: the lenght of @out
				87	* @in: a pointer ot an array of UTF-8 chars
				88	* @inlen: the lenght of @in
				89	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	90	* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
				91	* block of chars out.
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	92	* TODO: UTF8Toisolat1 need a fallback mechanism ...
				93	*
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	94	* Returns the number of byte written, or -1 by lack of space, or -2
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	95	* if the transcoding failed.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	96	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	97	int
				98	UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	99	{
				100	unsigned char* outstart= out;
				101	unsigned char* outend= out+outlen;
				102	unsigned char* inend= in+inlen;
Daniel Veillard	ccb0963	1998-10-27 06:21:04 +0000	[diff] [blame]	103	unsigned char c;
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	104
				105	while (in < inend) {
				106	c= *in++;
				107	if (c < 0x80) {
				108	if (out >= outend) return -1;
				109	*out++= c;
				110	}
				111	else if (((c & 0xFE) == 0xC2) && in<inend) {
				112	if (out >= outend) return -1;
				113	out++= ((c & 0x03) << 6) \| (in++ & 0x3F);
				114	}
				115	else return -2;
				116	}
				117	return out-outstart;
				118	}
				119
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	120	/**
				121	* UTF16ToUTF8:
				122	* @out: a pointer ot an array of bytes to store the result
				123	* @outlen: the lenght of @out
				124	* @in: a pointer ot an array of UTF-16 chars (array of unsigned shorts)
				125	* @inlen: the lenght of @in
				126	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	127	* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
				128	* block of chars out.
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	129	* Returns the number of byte written, or -1 by lack of space.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	130	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	131	int
				132	UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	133	{
				134	unsigned char* outstart= out;
				135	unsigned char* outend= out+outlen;
				136	unsigned short* inend= in+inlen;
				137	unsigned int c, d;
				138	int bits;
				139
				140	while (in < inend) {
				141	c= *in++;
				142	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				143	if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
				144	c &= 0x03FF;
				145	c <<= 10;
				146	c \|= d & 0x03FF;
				147	c += 0x10000;
				148	}
				149	else return -1;
				150	}
				151
				152	/* assertion: c is a single UTF-4 value */
				153
				154	if (out >= outend) return -1;
				155	if (c < 0x80) { *out++= c; bits= -6; }
				156	else if (c < 0x800) { *out++= (c >> 6) \| 0xC0; bits= 0; }
				157	else if (c < 0x10000) { *out++= (c >> 12) \| 0xE0; bits= 6; }
				158	else { *out++= (c >> 18) \| 0xF0; bits= 12; }
				159
				160	for ( ; bits < 0; bits-= 6) {
				161	if (out >= outend) return -1;
				162	*out++= (c >> bits) & 0x3F;
				163	}
				164	}
				165	return out-outstart;
				166	}
				167
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	168	/**
				169	* UTF8ToUTF16:
				170	* @out: a pointer ot an array of shorts to store the result
				171	* @outlen: the lenght of @out (number of shorts)
				172	* @in: a pointer ot an array of UTF-8 chars
				173	* @inlen: the lenght of @in
				174	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	175	* Take a block of UTF-8 chars in and try to convert it to an UTF-16
				176	* block of chars out.
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	177	* TODO: UTF8ToUTF16 need a fallback mechanism ...
				178	*
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	179	* Returns the number of byte written, or -1 by lack of space, or -2
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	180	* if the transcoding failed.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	181	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	182	int
				183	UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	184	{
				185	unsigned short* outstart= out;
				186	unsigned short* outend= out+outlen;
				187	unsigned char* inend= in+inlen;
				188	unsigned int c, d, trailing;
				189
				190	while (in < inend) {
				191	d= *in++;
				192	if (d < 0x80) { c= d; trailing= 0; }
				193	else if (d < 0xC0) return -2; /* trailing byte in leading position */
				194	else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				195	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				196	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				197	else return -2; /* no chance for this in UTF-16 */
				198
				199	for ( ; trailing; trailing--) {
				200	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80)) return -1;
				201	c <<= 6;
				202	c \|= d & 0x3F;
				203	}
				204
				205	/* assertion: c is a single UTF-4 value */
				206	if (c < 0x10000) {
				207	if (out >= outend) return -1;
				208	*out++ = c;
				209	}
				210	else if (c < 0x110000) {
				211	if (out+1 >= outend) return -1;
				212	c -= 0x10000;
				213	*out++ = 0xD800 \| (c >> 10);
				214	*out++ = 0xDC00 \| (c & 0x03FF);
				215	}
				216	else return -1;
				217	}
				218	return out-outstart;
				219	}
				220
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	221	#endif /* ! HAVE_UNICODE_H */
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	222
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	223	/**
				224	* xmlDetectCharEncoding:
				225	* @in: a pointer to the first bytes of the XML entity, must be at least
				226	* 4 bytes long.
				227	*
				228	* Guess the encoding of the entity using the first bytes of the entity content
				229	* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
				230	*
				231	* Returns one of the XML_CHAR_ENCODING_... values.
				232	*/
				233	xmlCharEncoding
Daniel Veillard	011b63c	1999-06-02 17:44:04 +0000	[diff] [blame]	234	xmlDetectCharEncoding(const unsigned char* in)
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	235	{
				236	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				237	(in[2] == 0x00) && (in[3] == 0x3C))
				238	return(XML_CHAR_ENCODING_UCS4BE);
				239	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
				240	(in[2] == 0x00) && (in[3] == 0x00))
				241	return(XML_CHAR_ENCODING_UCS4LE);
				242	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				243	(in[2] == 0x3C) && (in[3] == 0x00))
				244	return(XML_CHAR_ENCODING_UCS4_2143);
				245	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
				246	(in[2] == 0x00) && (in[3] == 0x00))
				247	return(XML_CHAR_ENCODING_UCS4_3412);
				248	if ((in[0] == 0xFE) && (in[1] == 0xFF))
				249	return(XML_CHAR_ENCODING_UTF16BE);
				250	if ((in[0] == 0xFF) && (in[1] == 0xFE))
				251	return(XML_CHAR_ENCODING_UTF16LE);
				252	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
				253	(in[2] == 0xA7) && (in[3] == 0x94))
				254	return(XML_CHAR_ENCODING_EBCDIC);
				255	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
				256	(in[2] == 0x78) && (in[3] == 0x6D))
				257	return(XML_CHAR_ENCODING_UTF8);
				258	return(XML_CHAR_ENCODING_NONE);
				259	}
				260
				261	/**
				262	* xmlParseCharEncoding:
				263	* @name: the encoding name as parsed, in UTF-8 format (ASCCI actually)
				264	*
				265	* Conpare the string to the known encoding schemes already known. Note
				266	* that the comparison is case insensitive accordingly to the section
				267	* [XML] 4.3.3 Character Encoding in Entities.
				268	*
				269	* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
				270	* if not recognized.
				271	*/
				272	xmlCharEncoding
Daniel Veillard	011b63c	1999-06-02 17:44:04 +0000	[diff] [blame]	273	xmlParseCharEncoding(const char* name)
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	274	{
				275	char upper[500];
				276	int i;
				277
				278	for (i = 0;i < 499;i++) {
				279	upper[i] = toupper(name[i]);
				280	if (upper[i] == 0) break;
				281	}
				282	upper[i] = 0;
				283
				284	if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
				285	if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
				286	if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
				287
				288	/*
				289	* NOTE: if we were able to parse this, the endianness of UTF16 is
				290	* already found and in use
				291	*/
				292	if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
				293	if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
				294
				295	if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				296	if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				297	if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
				298
				299	/*
				300	* NOTE: if we were able to parse this, the endianness of UCS4 is
				301	* already found and in use
				302	*/
				303	if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				304	if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				305	if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
				306
				307
				308	if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
				309	if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
				310	if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
				311
				312	if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
				313	if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
				314	if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
				315
				316	if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
				317	if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
				318	if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
				319	if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
				320	if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
				321	if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
				322	if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
				323
				324	if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
				325	if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
				326	if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
				327	return(XML_CHAR_ENCODING_ERROR);
				328	}
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	329
				330	/****************************************************************
				331	* *
				332	* Char encoding handlers *
				333	* *
				334	****************************************************************/
				335
				336	/* the size should be growable, but it's not a big deal ... */
				337	#define MAX_ENCODING_HANDLERS 50
				338	static xmlCharEncodingHandlerPtr *handlers = NULL;
				339	static int nbCharEncodingHandler = 0;
				340
				341	/*
				342	* The default is UTF-8 for XML, that's also the default used for the
				343	* parser internals, so the default encoding handler is NULL
				344	*/
				345
				346	static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
				347
				348	/**
				349	* xmlNewCharEncodingHandler:
				350	* @name: the encoding name, in UTF-8 format (ASCCI actually)
				351	* @input: the xmlCharEncodingInputFunc to read that encoding
				352	* @output: the xmlCharEncodingOutputFunc to write that encoding
				353	*
				354	* Create and registers an xmlCharEncodingHandler.
				355	* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
				356	*/
				357	xmlCharEncodingHandlerPtr
				358	xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
				359	xmlCharEncodingOutputFunc output) {
				360	xmlCharEncodingHandlerPtr handler;
				361	char upper[500];
				362	int i;
				363	char *up = 0;
				364
				365	/*
				366	* Keep only the uppercase version of the encoding.
				367	*/
				368	if (name == NULL) {
				369	fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");
				370	return(NULL);
				371	}
				372	for (i = 0;i < 499;i++) {
				373	upper[i] = toupper(name[i]);
				374	if (upper[i] == 0) break;
				375	}
				376	upper[i] = 0;
				377	up = strdup(upper);
				378	if (up == NULL) {
				379	fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
				380	return(NULL);
				381	}
				382
				383	/*
				384	* allocate and fill-up an handler block.
				385	*/
				386	handler = (xmlCharEncodingHandlerPtr)
				387	malloc(sizeof(xmlCharEncodingHandler));
				388	if (handler == NULL) {
				389	fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
				390	return(NULL);
				391	}
				392	handler->input = input;
				393	handler->output = output;
				394	handler->name = up;
				395
				396	/*
				397	* registers and returns the handler.
				398	*/
				399	xmlRegisterCharEncodingHandler(handler);
				400	return(handler);
				401	}
				402
				403	/**
				404	* xmlInitCharEncodingHandlers:
				405	*
				406	* Initialize the char encoding support, it registers the default
				407	* encoding supported.
				408	* NOTE: while public theis function usually don't need to be called
				409	* in normal processing.
				410	*/
				411	void
				412	xmlInitCharEncodingHandlers(void) {
				413	if (handlers != NULL) return;
				414
				415	handlers = (xmlCharEncodingHandlerPtr *)
				416	malloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
				417
				418	if (handlers == NULL) {
				419	fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
				420	return;
				421	}
				422	xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	423	#ifdef HAVE_UNICODE_H
				424	#else
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	425	/* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	426	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	427	#endif
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	428	}
				429
				430	/**
				431	* xmlRegisterCharEncodingHandler:
				432	* @handler: the xmlCharEncodingHandlerPtr handler block
				433	*
				434	* Register the char encoding handler, surprizing, isn't it ?
				435	*/
				436	void
				437	xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
				438	if (handlers == NULL) xmlInitCharEncodingHandlers();
				439	if (handler == NULL) {
				440	fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n");
				441	return;
				442	}
				443
				444	if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
				445	fprintf(stderr,
				446	"xmlRegisterCharEncodingHandler: Too many handler registered\n");
				447	fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
				448	return;
				449	}
				450	handlers[nbCharEncodingHandler++] = handler;
				451	}
				452
				453	/**
				454	* xmlGetCharEncodingHandler:
				455	* @enc: an xmlCharEncoding value.
				456	*
				457	* Search in the registrered set the handler able to read/write that encoding.
				458	*
				459	* Returns the handler or NULL if not found
				460	*/
				461	xmlCharEncodingHandlerPtr
				462	xmlGetCharEncodingHandler(xmlCharEncoding enc) {
				463	if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame^]	464	/* TODO xmlGetCharEncodingHandler !!!!!!! */
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	465	return(NULL);
				466	}
				467
				468	/**
				469	* xmlGetCharEncodingHandler:
				470	* @enc: a string describing the char encoding.
				471	*
				472	* Search in the registrered set the handler able to read/write that encoding.
				473	*
				474	* Returns the handler or NULL if not found
				475	*/
				476	xmlCharEncodingHandlerPtr
				477	xmlFindCharEncodingHandler(const char *name) {
				478	char upper[500];
				479	int i;
				480
				481	if (handlers == NULL) xmlInitCharEncodingHandlers();
				482	if (name == NULL) return(xmlDefaultCharEncodingHandler);
				483	if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
				484
				485	for (i = 0;i < 499;i++) {
				486	upper[i] = toupper(name[i]);
				487	if (upper[i] == 0) break;
				488	}
				489	upper[i] = 0;
				490
				491	for (i = 0;i < nbCharEncodingHandler; i++)
				492	if (!strcmp(name, handlers[i]->name))
				493	return(handlers[i]);
				494
				495	return(NULL);
				496	}
				497