Blame - media/libdrm/mobile1/src/objmng/drm_i18n.c - platform/frameworks/base

blob: b1118a9843baef20dee1b53f05f6dc5e182b9676 [file] [log] [blame]

The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2007 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#include <objmng/drm_i18n.h>
				18
				19	#define IS_GB2312_HIGH_BYTE(c) ((c) >= 0xA1 && (c) <= 0xF7)
				20	#define IS_GB2312_LOW_BYTE(c) ((c) >= 0xA1 && (c) <= 0xFE)
				21	#define IS_GBK_HIGH_BYTE(c) ((c) >= 0x81 && (c) <= 0xFE)
				22	#define IS_GBK_LOW_BYTE(c) ((c) >= 0x40 && (c) <= 0xFE && (c) != 0x7F)
				23	#define IS_BIG5_HIGH_BYTE(c) ((c) >= 0xA1 && (c) <= 0xF9)
				24	#define IS_BIG5_LOW_BYTE(c) (((c) >= 0x40 && (c) <= 0x7E) \
				25	\|\| ((c) >= 0xA1 && (c) <= 0xFE))
				26	#define IS_ASCII(c) ((c) <= 127)
				27
				28	#define INVALID_UNICODE 0xFFFD
				29
				30	#define I18N_LATIN1_SUPPORT
				31	#define I18N_UTF8_UTF16_SUPPORT
				32
				33
				34	/**
				35	* Simply convert ISO 8859-1 (latin1) to unicode
				36	*/
				37	static int32_t latin1ToWcs(const uint8_t *mbs, int32_t mbsLen,
				38	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				39	int32_t *bytesConsumed);
				40
				41	/**
				42	* Convert one unicode char to ISO 8859-1 (latin1) byte
				43	*/
				44	static int32_t wcToLatin1(uint16_t wc, uint8_t * mbs, int32_t bufSize);
				45
				46	/**
				47	* Convert UTF-8 to unicode
				48	*/
				49	static int32_t utf8ToWcs(const uint8_t *mbs, int32_t mbsLen,
				50	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				51	int32_t *bytesConsumed);
				52
				53	/**
				54	* Convert one unicode char to UTF-8 bytes
				55	*/
				56	static int32_t wcToUtf8(uint16_t wc, uint8_t * mbs, int32_t bufSize);
				57
				58	/**
				59	* Convert UTF-16 BE to unicode
				60	*/
				61	static int32_t utf16beToWcs(const uint8_t *mbs, int32_t mbsLen,
				62	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				63	int32_t *bytesConsumed);
				64
				65	/**
				66	* Convert one unicode char to UTF-16 BE bytes
				67	*/
				68	static int32_t wcToUtf16be(uint16_t wc, uint8_t * mbs, int32_t bufSize);
				69
				70	/**
				71	* Convert UTF-16 LE to unicode
				72	*/
				73	static int32_t utf16leToWcs(const uint8_t *mbs, int32_t mbsLen,
				74	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				75	int32_t *bytesConsumed);
				76
				77	/**
				78	* Convert one unicode char to UTF-16 LE bytes
				79	*/
				80	static int32_t wcToUtf16le(uint16_t wc, uint8_t * mbs, int32_t bufSize);
				81
				82	/*
				83	* see drm_i18n.h
				84	*/
				85	int32_t DRM_i18n_mbsToWcs(DRM_Charset_t charset,
				86	const uint8_t *mbs, int32_t mbsLen,
				87	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				88	int32_t *bytesConsumed)
				89	{
				90	switch (charset)
				91	{
				92	#ifdef I18N_GB2312_SUPPORT
				93	case DRM_CHARSET_GB2312:
				94	return gb2312ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				95	#endif
				96	#ifdef I18N_GBK_SUPPORT
				97	case DRM_CHARSET_GBK:
				98	return gbkToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				99	#endif
				100	#ifdef I18N_BIG5_SUPPORT
				101	case DRM_CHARSET_BIG5:
				102	return big5ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				103	#endif
				104	#ifdef I18N_LATIN1_SUPPORT
				105	case DRM_CHARSET_LATIN1:
				106	return latin1ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				107	#endif
				108	#ifdef I18N_ISO8859X_SUPPORT
				109	case DRM_CHARSET_LATIN2:
				110	case DRM_CHARSET_LATIN3:
				111	case DRM_CHARSET_LATIN4:
				112	case DRM_CHARSET_CYRILLIC:
				113	case DRM_CHARSET_ARABIC:
				114	case DRM_CHARSET_GREEK:
				115	case DRM_CHARSET_HEBREW:
				116	case DRM_CHARSET_LATIN5:
				117	case DRM_CHARSET_LATIN6:
				118	case DRM_CHARSET_THAI:
				119	case DRM_CHARSET_LATIN7:
				120	case DRM_CHARSET_LATIN8:
				121	case DRM_CHARSET_LATIN9:
				122	case DRM_CHARSET_LATIN10:
				123	return iso8859xToWcs(charset, mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				124	#endif
				125	#ifdef I18N_UTF8_UTF16_SUPPORT
				126	case DRM_CHARSET_UTF8:
				127	return utf8ToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				128	case DRM_CHARSET_UTF16BE:
				129	return utf16beToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				130	case DRM_CHARSET_UTF16LE:
				131	return utf16leToWcs(mbs, mbsLen, wcsBuf, bufSizeInWideChar, bytesConsumed);
				132	#endif
				133	default:
				134	return -1;
				135	}
				136	}
				137
				138	/*
				139	* see drm_i18n.h
				140	*/
				141	int32_t DRM_i18n_wcsToMbs(DRM_Charset_t charset,
				142	const uint16_t *wcs, int32_t wcsLen,
				143	uint8_t *mbsBuf, int32_t bufSizeInByte)
				144	{
				145	int32_t (* wcToMbFunc)(uint16_t, uint8_t *, int32_t);
				146	int32_t charIndex = 0;
				147	int32_t numMultiBytes = 0;
				148
				149	switch (charset)
				150	{
				151	#ifdef I18N_LATIN1_SUPPORT
				152	case DRM_CHARSET_LATIN1:
				153	wcToMbFunc = wcToLatin1;
				154	break;
				155	#endif
				156	#ifdef I18N_UTF8_UTF16_SUPPORT
				157	case DRM_CHARSET_UTF8:
				158	wcToMbFunc = wcToUtf8;
				159	break;
				160	case DRM_CHARSET_UTF16BE:
				161	wcToMbFunc = wcToUtf16be;
				162	break;
				163	case DRM_CHARSET_UTF16LE:
				164	wcToMbFunc = wcToUtf16le;
				165	break;
				166	#endif
				167	#ifdef I18N_ISO8859X_SUPPORT
				168	case DRM_CHARSET_LATIN2:
				169	case DRM_CHARSET_LATIN3:
				170	case DRM_CHARSET_LATIN4:
				171	case DRM_CHARSET_CYRILLIC:
				172	case DRM_CHARSET_ARABIC:
				173	case DRM_CHARSET_GREEK:
				174	case DRM_CHARSET_HEBREW:
				175	case DRM_CHARSET_LATIN5:
				176	case DRM_CHARSET_LATIN6:
				177	case DRM_CHARSET_THAI:
				178	case DRM_CHARSET_LATIN7:
				179	case DRM_CHARSET_LATIN8:
				180	case DRM_CHARSET_LATIN9:
				181	case DRM_CHARSET_LATIN10:
				182	return wcsToIso8859x(charset, wcs, wcsLen, mbsBuf, bufSizeInByte);
				183	#endif
				184	default:
				185	return -1;
				186	}
				187
				188	if (mbsBuf) {
				189	while (numMultiBytes < bufSizeInByte && charIndex < wcsLen) {
				190	/* TODO: handle surrogate pair values here */
				191	int32_t mbLen = wcToMbFunc(wcs[charIndex],
				192	&mbsBuf[numMultiBytes], bufSizeInByte - numMultiBytes);
				193
				194	if (numMultiBytes + mbLen > bufSizeInByte) {
				195	/* Insufficient buffer. Don't update numMultiBytes */
				196	break;
				197	}
				198	charIndex++;
				199	numMultiBytes += mbLen;
				200	}
				201	} else {
				202	while (charIndex < wcsLen) {
				203	/* TODO: handle surrogate pair values here */
				204	numMultiBytes += wcToMbFunc(wcs[charIndex], NULL, 0);
				205	charIndex++;
				206	}
				207	}
				208
				209	return numMultiBytes;
				210	}
				211
				212
				213	#ifdef I18N_LATIN1_SUPPORT
				214
				215	int32_t latin1ToWcs(const uint8_t *mbs, int32_t mbsLen,
				216	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				217	int32_t *bytesConsumed)
				218	{
				219	int32_t charsToConvert;
				220	int32_t len;
				221
				222	if (wcsBuf == NULL) {
				223	return mbsLen;
				224	}
				225
				226	len = charsToConvert = mbsLen > bufSizeInWideChar ? bufSizeInWideChar : mbsLen;
				227	if (len < 0)
				228	return 0;
				229	while (len--) {
				230	wcsBuf++ = mbs++;
				231	}
				232
				233	if (bytesConsumed)
				234	*bytesConsumed = charsToConvert;
				235
				236	return charsToConvert;
				237	}
				238
				239	int32_t wcToLatin1(uint16_t wc, uint8_t * mbs, int32_t bufSize)
				240	{
				241	uint8_t ch;
				242
				243	if (wc < 0x100) {
				244	ch = (uint8_t)(wc & 0xff);
				245	} else {
				246	ch = '?';
				247	}
				248	if (mbs && bufSize > 0)
				249	*mbs = ch;
				250	return 1;
				251	}
				252
				253	#endif /* I18N_LATIN1_SUPPORT */
				254
				255	#ifdef I18N_UTF8_UTF16_SUPPORT
				256
				257	int32_t utf8ToWcs(const uint8_t *mbs, int32_t mbsLen,
				258	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				259	int32_t *bytesConsumed)
				260	{
				261	int32_t charsConverted = 0;
				262	int32_t i = 0;
				263	int32_t wideChar;
				264
				265	if (wcsBuf == NULL) {
				266	/* No conversion but we're still going to calculate bytesConsumed */
				267	bufSizeInWideChar = mbsLen * 2;
				268	}
				269
				270	while((i < mbsLen) && (charsConverted < bufSizeInWideChar)) {
				271	uint8_t ch = mbs[i];
				272	uint8_t ch2, ch3, ch4;
				273
				274	wideChar = -1;
				275
				276	if(IS_ASCII(ch)) {
				277	wideChar = ch;
				278	i++;
				279	} else if ((ch & 0xc0) == 0xc0) {
				280	int utfStart = i;
				281	if ((ch & 0xe0) == 0xc0) {
				282	/* 2 byte sequence */
				283	if (i + 1 < mbsLen && ((ch2 = mbs[i + 1]) & 0xc0) == 0x80) {
				284	wideChar = (uint16_t)(((ch & 0x1F) << 6) \| (ch2 & 0x3F));
				285	i += 2;
				286	} else {
				287	/* skip incomplete sequence */
				288	i++;
				289	}
				290	} else if ((ch & 0xf0) == 0xe0) {
				291	/* 3 byte sequence */
				292	if (i + 2 < mbsLen
				293	&& ((ch2 = mbs[i + 1]) & 0xc0) == 0x80
				294	&& ((ch3 = mbs[i + 2]) & 0xc0) == 0x80) {
				295	wideChar = (uint16_t)(((ch & 0x0F) << 12) \| ((ch2 & 0x3F) << 6) \| (ch3 & 0x3F));
				296	i += 3;
				297	} else {
				298	/* skip incomplete sequence (up to 2 bytes) */
				299	i++;
				300	if (i < mbsLen && (mbs[i] & 0xc0) == 0x80)
				301	i++;
				302	}
				303	} else if ((ch & 0xf8) == 0xf0) {
				304	/* 4 byte sequence */
				305	if (i + 3 < mbsLen
				306	&& ((ch2 = mbs[i + 1]) & 0xc0) == 0x80
				307	&& ((ch3 = mbs[i + 2]) & 0xc0) == 0x80
				308	&& ((ch4 = mbs[i + 3]) & 0xc0) == 0x80) {
				309	/* FIXME: we do NOT support U+10000 - U+10FFFF for now.
				310	* leave it as 0xFFFD. */
				311	wideChar = INVALID_UNICODE;
				312	i += 4;
				313	} else {
				314	/* skip incomplete sequence (up to 3 bytes) */
				315	i++;
				316	if (i < mbsLen && (mbs[i] & 0xc0) == 0x80) {
				317	i++;
				318	if (i < mbsLen && (mbs[i] & 0xc0) == 0x80) {
				319	i++;
				320	}
				321	}
				322	}
				323	} else {
				324	/* invalid */
				325	i++;
				326	}
				327	if (i >= mbsLen && wideChar == -1) {
				328	/* Possible incomplete UTF-8 sequence at the end of mbs.
				329	* Leave it to the caller.
				330	*/
				331	i = utfStart;
				332	break;
				333	}
				334	} else {
				335	/* invalid */
				336	i++;
				337	}
				338	if(wcsBuf) {
				339	if (wideChar == -1)
				340	wideChar = INVALID_UNICODE;
				341	wcsBuf[charsConverted] = (uint16_t)wideChar;
				342	}
				343	charsConverted++;
				344	}
				345
				346	if (bytesConsumed)
				347	*bytesConsumed = i;
				348
				349	return charsConverted;
				350	}
				351
				352	int32_t wcToUtf8(uint16_t wc, uint8_t * mbs, int32_t bufSize)
				353	{
				354	if (wc <= 0x7f) {
				355	if (mbs && (bufSize >= 1)) {
				356	*mbs = (uint8_t)wc;
				357	}
				358	return 1;
				359	} else if (wc <= 0x7ff) {
				360	if (mbs && (bufSize >= 2)) {
				361	*mbs++ = (uint8_t)((wc >> 6) \| 0xc0);
				362	*mbs = (uint8_t)((wc & 0x3f) \| 0x80);
				363	}
				364	return 2;
				365	} else {
				366	if (mbs && (bufSize >= 3)) {
				367	*mbs++ = (uint8_t)((wc >> 12) \| 0xe0);
				368	*mbs++ = (uint8_t)(((wc >> 6) & 0x3f)\| 0x80);
				369	*mbs = (uint8_t)((wc & 0x3f) \| 0x80);
				370	}
				371	return 3;
				372	}
				373	}
				374
				375	int32_t utf16beToWcs(const uint8_t *mbs, int32_t mbsLen,
				376	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				377	int32_t *bytesConsumed)
				378	{
				379	int32_t charsToConvert;
				380	int32_t len;
				381
				382	if (wcsBuf == NULL) {
				383	return mbsLen / 2;
				384	}
				385
				386	len = charsToConvert = (mbsLen / 2) > bufSizeInWideChar ? bufSizeInWideChar : (mbsLen / 2);
				387	while (len--) {
				388	/* TODO: handle surrogate pair values */
				389	wcsBuf++ = (uint16_t)((mbs << 8) \| *(mbs + 1));
				390	mbs += 2;
				391	}
				392
				393	if (bytesConsumed)
				394	bytesConsumed = charsToConvert 2;
				395
				396	return charsToConvert;
				397	}
				398
				399	int32_t wcToUtf16be(uint16_t wc, uint8_t * mbs, int32_t bufSize)
				400	{
				401	if (mbs && bufSize >= 2) {
				402	/* TODO: handle surrogate pair values */
				403	*mbs = (uint8_t)(wc >> 8);
				404	*(mbs + 1) = (uint8_t)(wc & 0xff);
				405	}
				406	return 2;
				407	}
				408
				409	int32_t utf16leToWcs(const uint8_t *mbs, int32_t mbsLen,
				410	uint16_t *wcsBuf, int32_t bufSizeInWideChar,
				411	int32_t *bytesConsumed)
				412	{
				413	int32_t charsToConvert;
				414	int32_t len;
				415
				416	if (wcsBuf == NULL) {
				417	return mbsLen / 2;
				418	}
				419
				420	len = charsToConvert = (mbsLen / 2) > bufSizeInWideChar ? bufSizeInWideChar : (mbsLen / 2);
				421	while (len--) {
				422	/* TODO: handle surrogate pair values */
				423	wcsBuf++ = (uint16_t)(mbs \| (*(mbs + 1) << 8));
				424	mbs += 2;
				425	}
				426
				427	if (bytesConsumed)
				428	bytesConsumed = charsToConvert 2;
				429
				430	return charsToConvert;
				431	}
				432
				433	int32_t wcToUtf16le(uint16_t wc, uint8_t * mbs, int32_t bufSize)
				434	{
				435	if (mbs && bufSize >= 2) {
				436	/* TODO: handle surrogate pair values */
				437	*mbs = (uint8_t)(wc & 0xff);
				438	*(mbs + 1) = (uint8_t)(wc >> 8);
				439	}
				440	return 2;
				441	}
				442
				443	#endif /* I18N_UTF8_UTF16_SUPPORT */
				444