Blame - encoding.c - fp2-dev/platform/external/libxml2

blob: 5169cdea493a13b037e0538fe7b2484f13505bdd [file] [log] [blame]

Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	1	/*
				2	* encoding.c : implements the encoding conversion functions needed for XML
				3	*
				4	* Related specs:
				5	* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
				6	* [ISO-10646] UTF-8 and UTF-16 in Annexes
				7	* [ISO-8859-1] ISO Latin-1 characters codes.
				8	* [UNICODE] The Unicode Consortium, "The Unicode Standard --
				9	* Worldwide Character Encoding -- Version 1.0", Addison-
				10	* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
				11	* described in Unicode Technical Report #4.
				12	* [US-ASCII] Coded Character Set--7-bit American Standard Code for
				13	* Information Interchange, ANSI X3.4-1986.
				14	*
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	15	* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	16	*
				17	* See Copyright for the status of this software.
				18	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	19	* Daniel.Veillard@w3.org
				20	*/
				21
Daniel Veillard	7f7d111	1999-09-22 09:46:25 +0000	[diff] [blame]	22	#ifndef WIN32
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame]	23	#include "config.h"
Daniel Veillard	7f7d111	1999-09-22 09:46:25 +0000	[diff] [blame]	24	#endif
				25
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	26	#include <stdio.h>
Daniel Veillard	7f7d111	1999-09-22 09:46:25 +0000	[diff] [blame]	27	#include <string.h>
				28
				29	#ifdef HAVE_CTYPE_H
				30	#include <ctype.h>
				31	#endif
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	32	#include "encoding.h"
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	33	#ifdef HAVE_UNICODE_H
				34	#include <unicode.h>
				35	#endif
Daniel Veillard	6454aec	1999-09-02 22:04:43 +0000	[diff] [blame]	36	#include "xmlmemory.h"
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	37
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	38	#ifdef HAVE_UNICODE_H
				39
				40	#else /* ! HAVE_UNICODE_H */
Daniel Veillard	0ba4d53	1998-11-01 19:34:31 +0000	[diff] [blame]	41	/*
				42	* From rfc2044: encoding of the Unicode values on UTF-8:
				43	*
				44	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				45	* 0000 0000-0000 007F 0xxxxxxx
				46	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				47	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				48	*
				49	* I hope we won't use values > 0xFFFF anytime soon !
				50	*/
				51
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	52	/**
				53	* isolat1ToUTF8:
				54	* @out: a pointer ot an array of bytes to store the result
				55	* @outlen: the lenght of @out
				56	* @in: a pointer ot an array of ISO Latin 1 chars
				57	* @inlen: the lenght of @in
				58	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	59	* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
				60	* block of chars out.
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	61	* Returns the number of byte written, or -1 by lack of space.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	62	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	63	int
				64	isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	65	{
				66	unsigned char* outstart= out;
				67	unsigned char* outend= out+outlen;
				68	unsigned char* inend= in+inlen;
				69	unsigned char c;
				70
				71	while (in < inend) {
				72	c= *in++;
				73	if (c < 0x80) {
				74	if (out >= outend) return -1;
				75	*out++ = c;
				76	}
				77	else {
				78	if (out >= outend) return -1;
				79	*out++ = 0xC0 \| (c >> 6);
				80	if (out >= outend) return -1;
				81	*out++ = 0x80 \| (0x3F & c);
				82	}
				83	}
				84	return out-outstart;
				85	}
				86
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	87	/**
				88	* UTF8Toisolat1:
				89	* @out: a pointer ot an array of bytes to store the result
				90	* @outlen: the lenght of @out
				91	* @in: a pointer ot an array of UTF-8 chars
				92	* @inlen: the lenght of @in
				93	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	94	* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
				95	* block of chars out.
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame]	96	* TODO: UTF8Toisolat1 need a fallback mechanism ...
				97	*
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	98	* Returns the number of byte written, or -1 by lack of space, or -2
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	99	* if the transcoding failed.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	100	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	101	int
				102	UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	103	{
				104	unsigned char* outstart= out;
				105	unsigned char* outend= out+outlen;
				106	unsigned char* inend= in+inlen;
Daniel Veillard	ccb0963	1998-10-27 06:21:04 +0000	[diff] [blame]	107	unsigned char c;
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	108
				109	while (in < inend) {
				110	c= *in++;
				111	if (c < 0x80) {
				112	if (out >= outend) return -1;
				113	*out++= c;
				114	}
				115	else if (((c & 0xFE) == 0xC2) && in<inend) {
				116	if (out >= outend) return -1;
				117	out++= ((c & 0x03) << 6) \| (in++ & 0x3F);
				118	}
				119	else return -2;
				120	}
				121	return out-outstart;
				122	}
				123
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	124	/**
				125	* UTF16ToUTF8:
				126	* @out: a pointer ot an array of bytes to store the result
				127	* @outlen: the lenght of @out
				128	* @in: a pointer ot an array of UTF-16 chars (array of unsigned shorts)
				129	* @inlen: the lenght of @in
				130	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	131	* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
				132	* block of chars out.
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	133	* Returns the number of byte written, or -1 by lack of space.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	134	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	135	int
				136	UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	137	{
				138	unsigned char* outstart= out;
				139	unsigned char* outend= out+outlen;
				140	unsigned short* inend= in+inlen;
				141	unsigned int c, d;
				142	int bits;
				143
				144	while (in < inend) {
				145	c= *in++;
				146	if ((c & 0xFC00) == 0xD800) { /* surrogates */
				147	if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
				148	c &= 0x03FF;
				149	c <<= 10;
				150	c \|= d & 0x03FF;
				151	c += 0x10000;
				152	}
				153	else return -1;
				154	}
				155
				156	/* assertion: c is a single UTF-4 value */
				157
				158	if (out >= outend) return -1;
				159	if (c < 0x80) { *out++= c; bits= -6; }
				160	else if (c < 0x800) { *out++= (c >> 6) \| 0xC0; bits= 0; }
				161	else if (c < 0x10000) { *out++= (c >> 12) \| 0xE0; bits= 6; }
				162	else { *out++= (c >> 18) \| 0xF0; bits= 12; }
				163
				164	for ( ; bits < 0; bits-= 6) {
				165	if (out >= outend) return -1;
				166	*out++= (c >> bits) & 0x3F;
				167	}
				168	}
				169	return out-outstart;
				170	}
				171
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	172	/**
				173	* UTF8ToUTF16:
				174	* @out: a pointer ot an array of shorts to store the result
				175	* @outlen: the lenght of @out (number of shorts)
				176	* @in: a pointer ot an array of UTF-8 chars
				177	* @inlen: the lenght of @in
				178	*
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	179	* Take a block of UTF-8 chars in and try to convert it to an UTF-16
				180	* block of chars out.
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame]	181	* TODO: UTF8ToUTF16 need a fallback mechanism ...
				182	*
Daniel Veillard	1e346af	1999-02-22 10:33:01 +0000	[diff] [blame]	183	* Returns the number of byte written, or -1 by lack of space, or -2
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	184	* if the transcoding failed.
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	185	*/
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	186	int
				187	UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
Daniel Veillard	891e404	1998-10-19 00:43:02 +0000	[diff] [blame]	188	{
				189	unsigned short* outstart= out;
				190	unsigned short* outend= out+outlen;
				191	unsigned char* inend= in+inlen;
				192	unsigned int c, d, trailing;
				193
				194	while (in < inend) {
				195	d= *in++;
				196	if (d < 0x80) { c= d; trailing= 0; }
				197	else if (d < 0xC0) return -2; /* trailing byte in leading position */
				198	else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
				199	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
				200	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
				201	else return -2; /* no chance for this in UTF-16 */
				202
				203	for ( ; trailing; trailing--) {
				204	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80)) return -1;
				205	c <<= 6;
				206	c \|= d & 0x3F;
				207	}
				208
				209	/* assertion: c is a single UTF-4 value */
				210	if (c < 0x10000) {
				211	if (out >= outend) return -1;
				212	*out++ = c;
				213	}
				214	else if (c < 0x110000) {
				215	if (out+1 >= outend) return -1;
				216	c -= 0x10000;
				217	*out++ = 0xD800 \| (c >> 10);
				218	*out++ = 0xDC00 \| (c & 0x03FF);
				219	}
				220	else return -1;
				221	}
				222	return out-outstart;
				223	}
				224
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	225	#endif /* ! HAVE_UNICODE_H */
Daniel Veillard	97b5877	1998-10-20 06:14:16 +0000	[diff] [blame]	226
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	227	/**
				228	* xmlDetectCharEncoding:
				229	* @in: a pointer to the first bytes of the XML entity, must be at least
				230	* 4 bytes long.
				231	*
				232	* Guess the encoding of the entity using the first bytes of the entity content
				233	* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
				234	*
				235	* Returns one of the XML_CHAR_ENCODING_... values.
				236	*/
				237	xmlCharEncoding
Daniel Veillard	011b63c	1999-06-02 17:44:04 +0000	[diff] [blame]	238	xmlDetectCharEncoding(const unsigned char* in)
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	239	{
				240	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				241	(in[2] == 0x00) && (in[3] == 0x3C))
				242	return(XML_CHAR_ENCODING_UCS4BE);
				243	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
				244	(in[2] == 0x00) && (in[3] == 0x00))
				245	return(XML_CHAR_ENCODING_UCS4LE);
				246	if ((in[0] == 0x00) && (in[1] == 0x00) &&
				247	(in[2] == 0x3C) && (in[3] == 0x00))
				248	return(XML_CHAR_ENCODING_UCS4_2143);
				249	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
				250	(in[2] == 0x00) && (in[3] == 0x00))
				251	return(XML_CHAR_ENCODING_UCS4_3412);
				252	if ((in[0] == 0xFE) && (in[1] == 0xFF))
				253	return(XML_CHAR_ENCODING_UTF16BE);
				254	if ((in[0] == 0xFF) && (in[1] == 0xFE))
				255	return(XML_CHAR_ENCODING_UTF16LE);
				256	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
				257	(in[2] == 0xA7) && (in[3] == 0x94))
				258	return(XML_CHAR_ENCODING_EBCDIC);
				259	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
				260	(in[2] == 0x78) && (in[3] == 0x6D))
				261	return(XML_CHAR_ENCODING_UTF8);
				262	return(XML_CHAR_ENCODING_NONE);
				263	}
				264
				265	/**
				266	* xmlParseCharEncoding:
				267	* @name: the encoding name as parsed, in UTF-8 format (ASCCI actually)
				268	*
				269	* Conpare the string to the known encoding schemes already known. Note
				270	* that the comparison is case insensitive accordingly to the section
				271	* [XML] 4.3.3 Character Encoding in Entities.
				272	*
				273	* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
				274	* if not recognized.
				275	*/
				276	xmlCharEncoding
Daniel Veillard	011b63c	1999-06-02 17:44:04 +0000	[diff] [blame]	277	xmlParseCharEncoding(const char* name)
Daniel Veillard	27d8874	1999-05-29 11:51:49 +0000	[diff] [blame]	278	{
				279	char upper[500];
				280	int i;
				281
				282	for (i = 0;i < 499;i++) {
				283	upper[i] = toupper(name[i]);
				284	if (upper[i] == 0) break;
				285	}
				286	upper[i] = 0;
				287
				288	if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
				289	if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
				290	if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
				291
				292	/*
				293	* NOTE: if we were able to parse this, the endianness of UTF16 is
				294	* already found and in use
				295	*/
				296	if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
				297	if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
				298
				299	if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				300	if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
				301	if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
				302
				303	/*
				304	* NOTE: if we were able to parse this, the endianness of UCS4 is
				305	* already found and in use
				306	*/
				307	if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				308	if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
				309	if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
				310
				311
				312	if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
				313	if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
				314	if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
				315
				316	if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
				317	if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
				318	if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
				319
				320	if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
				321	if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
				322	if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
				323	if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
				324	if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
				325	if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
				326	if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
				327
				328	if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
				329	if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
				330	if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
				331	return(XML_CHAR_ENCODING_ERROR);
				332	}
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	333
				334	/****************************************************************
				335	* *
				336	* Char encoding handlers *
				337	* *
				338	****************************************************************/
				339
				340	/* the size should be growable, but it's not a big deal ... */
				341	#define MAX_ENCODING_HANDLERS 50
				342	static xmlCharEncodingHandlerPtr *handlers = NULL;
				343	static int nbCharEncodingHandler = 0;
				344
				345	/*
				346	* The default is UTF-8 for XML, that's also the default used for the
				347	* parser internals, so the default encoding handler is NULL
				348	*/
				349
				350	static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
				351
				352	/**
				353	* xmlNewCharEncodingHandler:
				354	* @name: the encoding name, in UTF-8 format (ASCCI actually)
				355	* @input: the xmlCharEncodingInputFunc to read that encoding
				356	* @output: the xmlCharEncodingOutputFunc to write that encoding
				357	*
				358	* Create and registers an xmlCharEncodingHandler.
				359	* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
				360	*/
				361	xmlCharEncodingHandlerPtr
				362	xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
				363	xmlCharEncodingOutputFunc output) {
				364	xmlCharEncodingHandlerPtr handler;
				365	char upper[500];
				366	int i;
				367	char *up = 0;
				368
				369	/*
				370	* Keep only the uppercase version of the encoding.
				371	*/
				372	if (name == NULL) {
				373	fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");
				374	return(NULL);
				375	}
				376	for (i = 0;i < 499;i++) {
				377	upper[i] = toupper(name[i]);
				378	if (upper[i] == 0) break;
				379	}
				380	upper[i] = 0;
Daniel Veillard	6454aec	1999-09-02 22:04:43 +0000	[diff] [blame]	381	up = xmlMemStrdup(upper);
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	382	if (up == NULL) {
				383	fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
				384	return(NULL);
				385	}
				386
				387	/*
				388	* allocate and fill-up an handler block.
				389	*/
				390	handler = (xmlCharEncodingHandlerPtr)
Daniel Veillard	6454aec	1999-09-02 22:04:43 +0000	[diff] [blame]	391	xmlMalloc(sizeof(xmlCharEncodingHandler));
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	392	if (handler == NULL) {
				393	fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
				394	return(NULL);
				395	}
				396	handler->input = input;
				397	handler->output = output;
				398	handler->name = up;
				399
				400	/*
				401	* registers and returns the handler.
				402	*/
				403	xmlRegisterCharEncodingHandler(handler);
				404	return(handler);
				405	}
				406
				407	/**
				408	* xmlInitCharEncodingHandlers:
				409	*
				410	* Initialize the char encoding support, it registers the default
				411	* encoding supported.
				412	* NOTE: while public theis function usually don't need to be called
				413	* in normal processing.
				414	*/
				415	void
				416	xmlInitCharEncodingHandlers(void) {
				417	if (handlers != NULL) return;
				418
				419	handlers = (xmlCharEncodingHandlerPtr *)
Daniel Veillard	6454aec	1999-09-02 22:04:43 +0000	[diff] [blame]	420	xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	421
				422	if (handlers == NULL) {
				423	fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
				424	return;
				425	}
				426	xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	427	#ifdef HAVE_UNICODE_H
				428	#else
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame]	429	/* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	430	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
Daniel Veillard	b05deb7	1999-08-10 19:04:08 +0000	[diff] [blame]	431	#endif
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	432	}
				433
				434	/**
				435	* xmlRegisterCharEncodingHandler:
				436	* @handler: the xmlCharEncodingHandlerPtr handler block
				437	*
				438	* Register the char encoding handler, surprizing, isn't it ?
				439	*/
				440	void
				441	xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
				442	if (handlers == NULL) xmlInitCharEncodingHandlers();
				443	if (handler == NULL) {
				444	fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n");
				445	return;
				446	}
				447
				448	if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
				449	fprintf(stderr,
				450	"xmlRegisterCharEncodingHandler: Too many handler registered\n");
				451	fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
				452	return;
				453	}
				454	handlers[nbCharEncodingHandler++] = handler;
				455	}
				456
				457	/**
				458	* xmlGetCharEncodingHandler:
				459	* @enc: an xmlCharEncoding value.
				460	*
				461	* Search in the registrered set the handler able to read/write that encoding.
				462	*
				463	* Returns the handler or NULL if not found
				464	*/
				465	xmlCharEncodingHandlerPtr
				466	xmlGetCharEncodingHandler(xmlCharEncoding enc) {
				467	if (handlers == NULL) xmlInitCharEncodingHandlers();
Daniel Veillard	b96e643	1999-08-29 21:02:19 +0000	[diff] [blame]	468	/* TODO xmlGetCharEncodingHandler !!!!!!! */
Daniel Veillard	14fff06	1999-06-22 21:49:07 +0000	[diff] [blame]	469	return(NULL);
				470	}
				471
				472	/**
				473	* xmlGetCharEncodingHandler:
				474	* @enc: a string describing the char encoding.
				475	*
				476	* Search in the registrered set the handler able to read/write that encoding.
				477	*
				478	* Returns the handler or NULL if not found
				479	*/
				480	xmlCharEncodingHandlerPtr
				481	xmlFindCharEncodingHandler(const char *name) {
				482	char upper[500];
				483	int i;
				484
				485	if (handlers == NULL) xmlInitCharEncodingHandlers();
				486	if (name == NULL) return(xmlDefaultCharEncodingHandler);
				487	if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
				488
				489	for (i = 0;i < 499;i++) {
				490	upper[i] = toupper(name[i]);
				491	if (upper[i] == 0) break;
				492	}
				493	upper[i] = 0;
				494
				495	for (i = 0;i < nbCharEncodingHandler; i++)
				496	if (!strcmp(name, handlers[i]->name))
				497	return(handlers[i]);
				498
				499	return(NULL);
				500	}
				501