Blame - fs/udf/unicode.c - kernel/msm-4.9

blob: 5a80efd8debc9b7357668743a45411618c6c4d64 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* unicode.c
				3	*
				4	* PURPOSE
				5	* Routines for converting between UTF-8 and OSTA Compressed Unicode.
				6	* Also handles filename mangling
				7	*
				8	* DESCRIPTION
				9	* OSTA Compressed Unicode is explained in the OSTA UDF specification.
				10	* http://www.osta.org/
				11	* UTF-8 is explained in the IETF RFC XXXX.
				12	* ftp://ftp.internic.net/rfc/rfcxxxx.txt
				13	*
				14	* CONTACTS
				15	* E-mail regarding any portion of the Linux UDF file system should be
				16	* directed to the development team's mailing list (run by majordomo):
				17	* linux_udf@hpesjro.fc.hp.com
				18	*
				19	* COPYRIGHT
				20	* This file is distributed under the terms of the GNU General Public
				21	* License (GPL). Copies of the GPL can be obtained from:
				22	* ftp://prep.ai.mit.edu/pub/gnu/GPL
				23	* Each contributing author retains all rights to their own work.
				24	*/
				25
				26	#include "udfdecl.h"
				27
				28	#include <linux/kernel.h>
				29	#include <linux/string.h> /* for memset */
				30	#include <linux/nls.h>
				31	#include <linux/udf_fs.h>
				32
				33	#include "udf_sb.h"
				34
				35	static int udf_translate_to_linux(uint8_t , uint8_t , int, uint8_t *, int);
				36
				37	static int udf_char_to_ustr(struct ustr dest, const uint8_t src, int strlen)
				38	{
				39	if ( (!dest) \|\| (!src) \|\| (!strlen) \|\| (strlen > UDF_NAME_LEN-2) )
				40	return 0;
				41	memset(dest, 0, sizeof(struct ustr));
				42	memcpy(dest->u_name, src, strlen);
				43	dest->u_cmpID = 0x08;
				44	dest->u_len = strlen;
				45	return strlen;
				46	}
				47
				48	/*
				49	* udf_build_ustr
				50	*/
				51	int udf_build_ustr(struct ustr dest, dstring ptr, int size)
				52	{
				53	int usesize;
				54
				55	if ( (!dest) \|\| (!ptr) \|\| (!size) )
				56	return -1;
				57
				58	memset(dest, 0, sizeof(struct ustr));
				59	usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
				60	dest->u_cmpID=ptr[0];
				61	dest->u_len=ptr[size-1];
				62	memcpy(dest->u_name, ptr+1, usesize-1);
				63	return 0;
				64	}
				65
				66	/*
				67	* udf_build_ustr_exact
				68	*/
				69	static int udf_build_ustr_exact(struct ustr dest, dstring ptr, int exactsize)
				70	{
				71	if ( (!dest) \|\| (!ptr) \|\| (!exactsize) )
				72	return -1;
				73
				74	memset(dest, 0, sizeof(struct ustr));
				75	dest->u_cmpID=ptr[0];
				76	dest->u_len=exactsize-1;
				77	memcpy(dest->u_name, ptr+1, exactsize-1);
				78	return 0;
				79	}
				80
				81	/*
				82	* udf_ocu_to_utf8
				83	*
				84	* PURPOSE
				85	* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
				86	*
				87	* DESCRIPTION
				88	* This routine is only called by udf_filldir().
				89	*
				90	* PRE-CONDITIONS
				91	* utf Pointer to UTF-8 output buffer.
				92	* ocu Pointer to OSTA Compressed Unicode input buffer
				93	* of size UDF_NAME_LEN bytes.
				94	* both of type "struct ustr *"
				95	*
				96	* POST-CONDITIONS
				97	* <return> Zero on success.
				98	*
				99	* HISTORY
				100	* November 12, 1997 - Andrew E. Mileski
				101	* Written, tested, and released.
				102	*/
				103	int udf_CS0toUTF8(struct ustr utf_o, struct ustr ocu_i)
				104	{
				105	uint8_t *ocu;
				106	uint32_t c;
				107	uint8_t cmp_id, ocu_len;
				108	int i;
				109
				110	ocu = ocu_i->u_name;
				111
				112	ocu_len = ocu_i->u_len;
				113	cmp_id = ocu_i->u_cmpID;
				114	utf_o->u_len = 0;
				115
				116	if (ocu_len == 0)
				117	{
				118	memset(utf_o, 0, sizeof(struct ustr));
				119	utf_o->u_cmpID = 0;
				120	utf_o->u_len = 0;
				121	return 0;
				122	}
				123
				124	if ((cmp_id != 8) && (cmp_id != 16))
				125	{
				126	printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
				127	return 0;
				128	}
				129
				130	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
				131	{
				132
				133	/* Expand OSTA compressed Unicode to Unicode */
				134	c = ocu[i++];
				135	if (cmp_id == 16)
				136	c = (c << 8) \| ocu[i++];
				137
				138	/* Compress Unicode to UTF-8 */
				139	if (c < 0x80U)
				140	utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
				141	else if (c < 0x800U)
				142	{
				143	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 \| (c >> 6));
				144	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| (c & 0x3f));
				145	}
				146	else
				147	{
				148	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 \| (c >> 12));
				149	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| ((c >> 6) & 0x3f));
				150	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| (c & 0x3f));
				151	}
				152	}
				153	utf_o->u_cmpID=8;
				154
				155	return utf_o->u_len;
				156	}
				157
				158	/*
				159	*
				160	* udf_utf8_to_ocu
				161	*
				162	* PURPOSE
				163	* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
				164	*
				165	* DESCRIPTION
				166	* This routine is only called by udf_lookup().
				167	*
				168	* PRE-CONDITIONS
				169	* ocu Pointer to OSTA Compressed Unicode output
				170	* buffer of size UDF_NAME_LEN bytes.
				171	* utf Pointer to UTF-8 input buffer.
				172	* utf_len Length of UTF-8 input buffer in bytes.
				173	*
				174	* POST-CONDITIONS
				175	* <return> Zero on success.
				176	*
				177	* HISTORY
				178	* November 12, 1997 - Andrew E. Mileski
				179	* Written, tested, and released.
				180	*/
				181	static int udf_UTF8toCS0(dstring ocu, struct ustr utf, int length)
				182	{
				183	unsigned c, i, max_val, utf_char;
				184	int utf_cnt, u_len;
				185
				186	memset(ocu, 0, sizeof(dstring) * length);
				187	ocu[0] = 8;
				188	max_val = 0xffU;
				189
				190	try_again:
				191	u_len = 0U;
				192	utf_char = 0U;
				193	utf_cnt = 0U;
				194	for (i = 0U; i < utf->u_len; i++)
				195	{
				196	c = (uint8_t)utf->u_name[i];
				197
				198	/* Complete a multi-byte UTF-8 character */
				199	if (utf_cnt)
				200	{
				201	utf_char = (utf_char << 6) \| (c & 0x3fU);
				202	if (--utf_cnt)
				203	continue;
				204	}
				205	else
				206	{
				207	/* Check for a multi-byte UTF-8 character */
				208	if (c & 0x80U)
				209	{
				210	/* Start a multi-byte UTF-8 character */
				211	if ((c & 0xe0U) == 0xc0U)
				212	{
				213	utf_char = c & 0x1fU;
				214	utf_cnt = 1;
				215	}
				216	else if ((c & 0xf0U) == 0xe0U)
				217	{
				218	utf_char = c & 0x0fU;
				219	utf_cnt = 2;
				220	}
				221	else if ((c & 0xf8U) == 0xf0U)
				222	{
				223	utf_char = c & 0x07U;
				224	utf_cnt = 3;
				225	}
				226	else if ((c & 0xfcU) == 0xf8U)
				227	{
				228	utf_char = c & 0x03U;
				229	utf_cnt = 4;
				230	}
				231	else if ((c & 0xfeU) == 0xfcU)
				232	{
				233	utf_char = c & 0x01U;
				234	utf_cnt = 5;
				235	}
				236	else
				237	goto error_out;
				238	continue;
				239	} else
				240	/* Single byte UTF-8 character (most common) */
				241	utf_char = c;
				242	}
				243
				244	/* Choose no compression if necessary */
				245	if (utf_char > max_val)
				246	{
				247	if ( 0xffU == max_val )
				248	{
				249	max_val = 0xffffU;
				250	ocu[0] = (uint8_t)0x10U;
				251	goto try_again;
				252	}
				253	goto error_out;
				254	}
				255
				256	if (max_val == 0xffffU)
				257	{
				258	ocu[++u_len] = (uint8_t)(utf_char >> 8);
				259	}
				260	ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
				261	}
				262
				263
				264	if (utf_cnt)
				265	{
				266	error_out:
				267	ocu[++u_len] = '?';
				268	printk(KERN_DEBUG "udf: bad UTF-8 character\n");
				269	}
				270
				271	ocu[length - 1] = (uint8_t)u_len + 1;
				272	return u_len + 1;
				273	}
				274
				275	static int udf_CS0toNLS(struct nls_table nls, struct ustr utf_o, struct ustr *ocu_i)
				276	{
				277	uint8_t *ocu;
				278	uint32_t c;
				279	uint8_t cmp_id, ocu_len;
				280	int i;
				281
				282	ocu = ocu_i->u_name;
				283
				284	ocu_len = ocu_i->u_len;
				285	cmp_id = ocu_i->u_cmpID;
				286	utf_o->u_len = 0;
				287
				288	if (ocu_len == 0)
				289	{
				290	memset(utf_o, 0, sizeof(struct ustr));
				291	utf_o->u_cmpID = 0;
				292	utf_o->u_len = 0;
				293	return 0;
				294	}
				295
				296	if ((cmp_id != 8) && (cmp_id != 16))
				297	{
				298	printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
				299	return 0;
				300	}
				301
				302	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
				303	{
				304	/* Expand OSTA compressed Unicode to Unicode */
				305	c = ocu[i++];
				306	if (cmp_id == 16)
				307	c = (c << 8) \| ocu[i++];
				308
				309	utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
				310	UDF_NAME_LEN - utf_o->u_len);
				311	}
				312	utf_o->u_cmpID=8;
				313
				314	return utf_o->u_len;
				315	}
				316
				317	static int udf_NLStoCS0(struct nls_table nls, dstring ocu, struct ustr *uni, int length)
				318	{
				319	unsigned len, i, max_val;
				320	uint16_t uni_char;
				321	int u_len;
				322
				323	memset(ocu, 0, sizeof(dstring) * length);
				324	ocu[0] = 8;
				325	max_val = 0xffU;
				326
				327	try_again:
				328	u_len = 0U;
				329	for (i = 0U; i < uni->u_len; i++)
				330	{
				331	len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
				332	if (len <= 0)
				333	continue;
				334
				335	if (uni_char > max_val)
				336	{
				337	max_val = 0xffffU;
				338	ocu[0] = (uint8_t)0x10U;
				339	goto try_again;
				340	}
				341
				342	if (max_val == 0xffffU)
				343	ocu[++u_len] = (uint8_t)(uni_char >> 8);
				344	ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
				345	i += len - 1;
				346	}
				347
				348	ocu[length - 1] = (uint8_t)u_len + 1;
				349	return u_len + 1;
				350	}
				351
				352	int udf_get_filename(struct super_block sb, uint8_t sname, uint8_t *dname, int flen)
				353	{
				354	struct ustr filename, unifilename;
				355	int len;
				356
				357	if (udf_build_ustr_exact(&unifilename, sname, flen))
				358	{
				359	return 0;
				360	}
				361
				362	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
				363	{
				364	if (!udf_CS0toUTF8(&filename, &unifilename) )
				365	{
				366	udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
				367	return 0;
				368	}
				369	}
				370	else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
				371	{
				372	if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
				373	{
				374	udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
				375	return 0;
				376	}
				377	}
				378	else
				379	return 0;
				380
				381	if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
				382	unifilename.u_name, unifilename.u_len)))
				383	{
				384	return len;
				385	}
				386	return 0;
				387	}
				388
				389	int udf_put_filename(struct super_block sb, const uint8_t sname, uint8_t *dname, int flen)
				390	{
				391	struct ustr unifilename;
				392	int namelen;
				393
				394	if ( !(udf_char_to_ustr(&unifilename, sname, flen)) )
				395	{
				396	return 0;
				397	}
				398
				399	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
				400	{
				401	if ( !(namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN)) )
				402	{
				403	return 0;
				404	}
				405	}
				406	else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
				407	{
				408	if ( !(namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN)) )
				409	{
				410	return 0;
				411	}
				412	}
				413	else
				414	return 0;
				415
				416	return namelen;
				417	}
				418
				419	#define ILLEGAL_CHAR_MARK '_'
				420	#define EXT_MARK '.'
				421	#define CRC_MARK '#'
				422	#define EXT_SIZE 5
				423
				424	static int udf_translate_to_linux(uint8_t newName, uint8_t udfName, int udfLen, uint8_t *fidName, int fidNameLen)
				425	{
				426	int index, newIndex = 0, needsCRC = 0;
				427	int extIndex = 0, newExtIndex = 0, hasExt = 0;
				428	unsigned short valueCRC;
				429	uint8_t curr;
				430	const uint8_t hexChar[] = "0123456789ABCDEF";
				431
				432	if (udfName[0] == '.' && (udfLen == 1 \|\|
				433	(udfLen == 2 && udfName[1] == '.')))
				434	{
				435	needsCRC = 1;
				436	newIndex = udfLen;
				437	memcpy(newName, udfName, udfLen);
				438	}
				439	else
				440	{
				441	for (index = 0; index < udfLen; index++)
				442	{
				443	curr = udfName[index];
				444	if (curr == '/' \|\| curr == 0)
				445	{
				446	needsCRC = 1;
				447	curr = ILLEGAL_CHAR_MARK;
				448	while (index+1 < udfLen && (udfName[index+1] == '/' \|\|
				449	udfName[index+1] == 0))
				450	index++;
				451	}
				452	if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
				453	{
				454	if (udfLen == index + 1)
				455	hasExt = 0;
				456	else
				457	{
				458	hasExt = 1;
				459	extIndex = index;
				460	newExtIndex = newIndex;
				461	}
				462	}
				463	if (newIndex < 256)
				464	newName[newIndex++] = curr;
				465	else
				466	needsCRC = 1;
				467	}
				468	}
				469	if (needsCRC)
				470	{
				471	uint8_t ext[EXT_SIZE];
				472	int localExtIndex = 0;
				473
				474	if (hasExt)
				475	{
				476	int maxFilenameLen;
				477	for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
				478	index++ )
				479	{
				480	curr = udfName[extIndex + index + 1];
				481
				482	if (curr == '/' \|\| curr == 0)
				483	{
				484	needsCRC = 1;
				485	curr = ILLEGAL_CHAR_MARK;
				486	while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
				487	&& (udfName[extIndex + index + 2] == '/' \|\|
				488	udfName[extIndex + index + 2] == 0)))
				489	index++;
				490	}
				491	ext[localExtIndex++] = curr;
				492	}
				493	maxFilenameLen = 250 - localExtIndex;
				494	if (newIndex > maxFilenameLen)
				495	newIndex = maxFilenameLen;
				496	else
				497	newIndex = newExtIndex;
				498	}
				499	else if (newIndex > 250)
				500	newIndex = 250;
				501	newName[newIndex++] = CRC_MARK;
				502	valueCRC = udf_crc(fidName, fidNameLen, 0);
				503	newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
				504	newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
				505	newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
				506	newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
				507
				508	if (hasExt)
				509	{
				510	newName[newIndex++] = EXT_MARK;
				511	for (index = 0;index < localExtIndex ;index++ )
				512	newName[newIndex++] = ext[index];
				513	}
				514	}
				515	return newIndex;
				516	}