Blame - src/share/utf8.c - platform/external/flac

blob: f1e5708db047f10def372eb37589c01f5f8bd6a3 [file] [log] [blame]

Josh Coalson	fda98fb	2002-05-17 06:33:39 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com>
				3	* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
				9	*
				10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	* GNU General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU General Public License
				16	* along with this program; if not, write to the Free Software
				17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				18	*/
				19
				20	/*
				21	* Convert a string between UTF-8 and the locale's charset.
				22	*/
				23
				24	#include <stdlib.h>
				25	#include <string.h>
				26
				27	#include "utf8.h"
Josh Coalson	f8e6b09	2002-08-14 00:51:30 +0000	[diff] [blame]	28	#include "charset.h"
Josh Coalson	fda98fb	2002-05-17 06:33:39 +0000	[diff] [blame]	29
				30
				31	#ifdef _WIN32
				32
				33	/* Thanks to Peter Harris <peter.harris@hummingbird.com> for this win32
				34	* code.
				35	*/
				36
				37	#include <stdio.h>
				38	#include <windows.h>
				39
				40	static unsigned char make_utf8_string(const wchar_t unicode)
				41	{
				42	int size = 0, index = 0, out_index = 0;
				43	unsigned char *out;
				44	unsigned short c;
				45
				46	/* first calculate the size of the target string */
				47	c = unicode[index++];
				48	while(c) {
				49	if(c < 0x0080) {
				50	size += 1;
				51	} else if(c < 0x0800) {
				52	size += 2;
				53	} else {
				54	size += 3;
				55	}
				56	c = unicode[index++];
				57	}
				58
				59	out = malloc(size + 1);
				60	if (out == NULL)
				61	return NULL;
				62	index = 0;
				63
				64	c = unicode[index++];
				65	while(c)
				66	{
				67	if(c < 0x080) {
				68	out[out_index++] = (unsigned char)c;
				69	} else if(c < 0x800) {
				70	out[out_index++] = 0xc0 \| (c >> 6);
				71	out[out_index++] = 0x80 \| (c & 0x3f);
				72	} else {
				73	out[out_index++] = 0xe0 \| (c >> 12);
				74	out[out_index++] = 0x80 \| ((c >> 6) & 0x3f);
				75	out[out_index++] = 0x80 \| (c & 0x3f);
				76	}
				77	c = unicode[index++];
				78	}
				79	out[out_index] = 0x00;
				80
				81	return out;
				82	}
				83
				84	static wchar_t make_unicode_string(const unsigned char utf8)
				85	{
				86	int size = 0, index = 0, out_index = 0;
				87	wchar_t *out;
				88	unsigned char c;
				89
				90	/* first calculate the size of the target string */
				91	c = utf8[index++];
				92	while(c) {
				93	if((c & 0x80) == 0) {
				94	index += 0;
				95	} else if((c & 0xe0) == 0xe0) {
				96	index += 2;
				97	} else {
				98	index += 1;
				99	}
				100	size += 1;
				101	c = utf8[index++];
				102	}
				103
				104	out = malloc((size + 1) * sizeof(wchar_t));
				105	if (out == NULL)
				106	return NULL;
				107	index = 0;
				108
				109	c = utf8[index++];
				110	while(c)
				111	{
				112	if((c & 0x80) == 0) {
				113	out[out_index++] = c;
				114	} else if((c & 0xe0) == 0xe0) {
				115	out[out_index] = (c & 0x1F) << 12;
				116	c = utf8[index++];
				117	out[out_index] \|= (c & 0x3F) << 6;
				118	c = utf8[index++];
				119	out[out_index++] \|= (c & 0x3F);
				120	} else {
				121	out[out_index] = (c & 0x3F) << 6;
				122	c = utf8[index++];
				123	out[out_index++] \|= (c & 0x3F);
				124	}
				125	c = utf8[index++];
				126	}
				127	out[out_index] = 0;
				128
				129	return out;
				130	}
				131
				132	int utf8_encode(const char from, char *to)
				133	{
				134	wchar_t *unicode;
				135	int wchars, err;
				136
				137	wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
				138	strlen(from), NULL, 0);
				139
				140	if(wchars == 0)
				141	{
				142	fprintf(stderr, "Unicode translation error %d\n", GetLastError());
				143	return -1;
				144	}
				145
				146	unicode = calloc(wchars + 1, sizeof(unsigned short));
				147	if(unicode == NULL)
				148	{
				149	fprintf(stderr, "Out of memory processing string to UTF8\n");
				150	return -1;
				151	}
				152
				153	err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
				154	strlen(from), unicode, wchars);
				155	if(err != wchars)
				156	{
				157	free(unicode);
				158	fprintf(stderr, "Unicode translation error %d\n", GetLastError());
				159	return -1;
				160	}
				161
				162	/* On NT-based windows systems, we could use WideCharToMultiByte(), but
				163	* MS doesn't actually have a consistent API across win32.
				164	*/
				165	*to = make_utf8_string(unicode);
				166
				167	free(unicode);
				168	return 0;
				169	}
				170
				171	int utf8_decode(const char from, char *to)
				172	{
				173	wchar_t *unicode;
				174	int chars, err;
				175
				176	/* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
				177	* MS doesn't actually have a consistent API across win32.
				178	*/
				179	unicode = make_unicode_string(from);
				180	if(unicode == NULL)
				181	{
				182	fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
				183	return -1;
				184	}
				185
				186	chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
				187	-1, NULL, 0, NULL, NULL);
				188
				189	if(chars == 0)
				190	{
				191	fprintf(stderr, "Unicode translation error %d\n", GetLastError());
				192	free(unicode);
				193	return -1;
				194	}
				195
				196	*to = calloc(chars + 1, sizeof(unsigned char));
				197	if(*to == NULL)
				198	{
				199	fprintf(stderr, "Out of memory processing string to local charset\n");
				200	free(unicode);
				201	return -1;
				202	}
				203
				204	err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
				205	-1, *to, chars, NULL, NULL);
				206	if(err != chars)
				207	{
				208	fprintf(stderr, "Unicode translation error %d\n", GetLastError());
				209	free(unicode);
				210	free(*to);
				211	*to = NULL;
				212	return -1;
				213	}
				214
				215	free(unicode);
				216	return 0;
				217	}
				218
				219	#else /* End win32. Rest is for real operating systems */
				220
				221
				222	#ifdef HAVE_LANGINFO_CODESET
				223	#include <langinfo.h>
				224	#endif
				225
				226	int iconvert(const char fromcode, const char tocode,
				227	const char *from, size_t fromlen,
				228	char *to, size_t tolen);
				229
				230	static char current_charset = 0; / means "US-ASCII" */
				231
				232	void convert_set_charset(const char *charset)
				233	{
				234
				235	#ifdef HAVE_LANGINFO_CODESET
				236	if (!charset)
				237	charset = nl_langinfo(CODESET);
				238	#endif
				239
				240	if (!charset)
				241	charset = getenv("CHARSET");
				242
				243	free(current_charset);
				244	current_charset = 0;
				245	if (charset && *charset)
				246	current_charset = strdup(charset);
				247	}
				248
				249	static int convert_buffer(const char fromcode, const char tocode,
				250	const char *from, size_t fromlen,
				251	char *to, size_t tolen)
				252	{
				253	int ret = -1;
				254
				255	#ifdef HAVE_ICONV
				256	ret = iconvert(fromcode, tocode, from, fromlen, to, tolen);
				257	if (ret != -1)
				258	return ret;
				259	#endif
				260
				261	#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */
				262	ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen);
				263	if (ret != -1)
				264	return ret;
				265	#endif
				266
				267	return ret;
				268	}
				269
				270	static int convert_string(const char fromcode, const char tocode,
				271	const char from, char *to, char replace)
				272	{
				273	int ret;
				274	size_t fromlen;
				275	char *s;
				276
				277	fromlen = strlen(from);
				278	ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0);
				279	if (ret == -2)
				280	return -1;
				281	if (ret != -1)
				282	return ret;
				283
				284	s = malloc(fromlen + 1);
				285	if (!s)
				286	return -1;
				287	strcpy(s, from);
				288	*to = s;
				289	for (; *s; s++)
				290	if (*s & ~0x7f)
				291	*s = replace;
				292	return 3;
				293	}
				294
				295	int utf8_encode(const char from, char *to)
				296	{
				297	char *charset;
				298
				299	if (!current_charset)
				300	convert_set_charset(0);
				301	charset = current_charset ? current_charset : "US-ASCII";
				302	return convert_string(charset, "UTF-8", from, to, '#');
				303	}
				304
				305	int utf8_decode(const char from, char *to)
				306	{
				307	char *charset;
				308
				309	if (!current_charset)
				310	convert_set_charset(0);
				311	charset = current_charset ? current_charset : "US-ASCII";
				312	return convert_string("UTF-8", charset, from, to, '?');
				313	}
				314
				315	#endif