Blame - Python/fileutils.c - platform/external/python/cpython3

blob: 53e8a470e952b30194ce85e1448a4c9a2c4813ce [file] [log] [blame]

Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	1	#include "Python.h"
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	2	#ifdef MS_WINDOWS
				3	# include <windows.h>
				4	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	5
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	6	#ifdef HAVE_LANGINFO_H
				7	#include <locale.h>
				8	#include <langinfo.h>
				9	#endif
				10
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	11	#ifdef __APPLE__
				12	extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
				13	#endif
				14
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	15	#if !defined(__APPLE__) && !defined(MS_WINDOWS)
				16	extern int _Py_normalize_encoding(const char , char , size_t);
				17
				18	/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
				19	On these operating systems, nl_langinfo(CODESET) announces an alias of the
				20	ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
				21	ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
				22	locale.getpreferredencoding() codec. For example, if command line arguments
				23	are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
				24	UnicodeEncodeError instead of retrieving the original byte string.
				25
				26	The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
				27	nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
				28	one byte in range 0x80-0xff can be decoded from the locale encoding. The
				29	workaround is also enabled on error, for example if getting the locale
				30	failed.
				31
				32	Values of locale_is_ascii:
				33
				34	1: the workaround is used: _Py_wchar2char() uses
				35	encode_ascii_surrogateescape() and _Py_char2wchar() uses
				36	decode_ascii_surrogateescape()
				37	0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
				38	_Py_char2wchar() uses mbstowcs()
				39	-1: unknown, need to call check_force_ascii() to get the value
				40	*/
				41	static int force_ascii = -1;
				42
				43	static int
				44	check_force_ascii(void)
				45	{
				46	char *loc;
				47	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				48	char codeset, *alias;
				49	char encoding[100];
				50	int is_ascii;
				51	unsigned int i;
				52	char* ascii_aliases[] = {
				53	"ascii",
				54	"646",
				55	"ansi-x3.4-1968",
				56	"ansi-x3-4-1968",
				57	"ansi-x3.4-1986",
				58	"cp367",
				59	"csascii",
				60	"ibm367",
				61	"iso646-us",
				62	"iso-646.irv-1991",
				63	"iso-ir-6",
				64	"us",
				65	"us-ascii",
				66	NULL
				67	};
				68	#endif
				69
				70	loc = setlocale(LC_CTYPE, NULL);
				71	if (loc == NULL)
				72	goto error;
				73	if (strcmp(loc, "C") != 0) {
				74	/* the LC_CTYPE locale is different than C */
				75	return 0;
				76	}
				77
				78	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				79	codeset = nl_langinfo(CODESET);
				80	if (!codeset \|\| codeset[0] == '\0') {
				81	/* CODESET is not set or empty */
				82	goto error;
				83	}
				84	if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
				85	goto error;
				86
				87	is_ascii = 0;
				88	for (alias=ascii_aliases; *alias != NULL; alias++) {
				89	if (strcmp(encoding, *alias) == 0) {
				90	is_ascii = 1;
				91	break;
				92	}
				93	}
				94	if (!is_ascii) {
				95	/* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
				96	return 0;
				97	}
				98
				99	for (i=0x80; i<0xff; i++) {
				100	unsigned char ch;
				101	wchar_t wch;
				102	size_t res;
				103
				104	ch = (unsigned char)i;
				105	res = mbstowcs(&wch, (char*)&ch, 1);
				106	if (res != (size_t)-1) {
				107	/* decoding a non-ASCII character from the locale encoding succeed:
				108	the locale encoding is not ASCII, force ASCII */
				109	return 1;
				110	}
				111	}
				112	/* None of the bytes in the range 0x80-0xff can be decoded from the locale
				113	encoding: the locale encoding is really ASCII */
				114	return 0;
				115	#else
				116	/* nl_langinfo(CODESET) is not available: always force ASCII */
				117	return 1;
				118	#endif
				119
				120	error:
				121	/* if an error occured, force the ASCII encoding */
				122	return 1;
				123	}
				124
				125	static char*
				126	encode_ascii_surrogateescape(const wchar_t text, size_t error_pos)
				127	{
				128	char result = NULL, out;
				129	size_t len, i;
				130	wchar_t ch;
				131
				132	if (error_pos != NULL)
				133	*error_pos = (size_t)-1;
				134
				135	len = wcslen(text);
				136
				137	result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
				138	if (result == NULL)
				139	return NULL;
				140
				141	out = result;
				142	for (i=0; i<len; i++) {
				143	ch = text[i];
				144
				145	if (ch <= 0x7f) {
				146	/* ASCII character */
				147	*out++ = (char)ch;
				148	}
				149	else if (0xdc80 <= ch && ch <= 0xdcff) {
				150	/* UTF-8b surrogate */
				151	*out++ = (char)(ch - 0xdc00);
				152	}
				153	else {
				154	if (error_pos != NULL)
				155	*error_pos = i;
				156	PyMem_Free(result);
				157	return NULL;
				158	}
				159	}
				160	*out = '\0';
				161	return result;
				162	}
				163	#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
				164
				165	#if !defined(__APPLE__) && (!defined(MS_WINDOWS) \|\| !defined(HAVE_MBRTOWC))
				166	static wchar_t*
				167	decode_ascii_surrogateescape(const char arg, size_t size)
				168	{
				169	wchar_t *res;
				170	unsigned char *in;
				171	wchar_t *out;
				172
				173	res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
				174	if (!res)
				175	return NULL;
				176
				177	in = (unsigned char*)arg;
				178	out = res;
				179	while(*in)
				180	if(*in < 128)
				181	out++ = in++;
				182	else
				183	out++ = 0xdc00 + in++;
				184	*out = 0;
				185	if (size != NULL)
				186	*size = out - res;
				187	return res;
				188	}
				189	#endif
				190
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	191
				192	/* Decode a byte string from the locale encoding with the
				193	surrogateescape error handler (undecodable bytes are decoded as characters
				194	in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
				195	character, escape the bytes using the surrogateescape error handler instead
				196	of decoding them.
				197
				198	Use _Py_wchar2char() to encode the character string back to a byte string.
				199
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	200	Return a pointer to a newly allocated wide character string (use
				201	PyMem_Free() to free the memory) and write the number of written wide
				202	characters excluding the null character into *size if size is not NULL, or
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	203	NULL on error (conversion or memory allocation error).
				204
				205	Conversion errors should never happen, unless there is a bug in the C
				206	library. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	207	wchar_t*
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	208	_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	209	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	210	#ifdef __APPLE__
				211	wchar_t *wstr;
				212	wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
				213	if (size != NULL) {
				214	if (wstr != NULL)
				215	*size = wcslen(wstr);
				216	else
				217	*size = (size_t)-1;
				218	}
				219	return wstr;
				220	#else
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	221	wchar_t *res;
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	222	size_t argsize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	223	size_t count;
				224	unsigned char *in;
				225	wchar_t *out;
				226	#ifdef HAVE_MBRTOWC
				227	mbstate_t mbs;
				228	#endif
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	229
				230	#ifndef MS_WINDOWS
				231	if (force_ascii == -1)
				232	force_ascii = check_force_ascii();
				233
				234	if (force_ascii) {
				235	/* force ASCII encoding to workaround mbstowcs() issue */
				236	res = decode_ascii_surrogateescape(arg, size);
				237	if (res == NULL)
				238	goto oom;
				239	return res;
				240	}
				241	#endif
				242
				243	#ifdef HAVE_BROKEN_MBSTOWCS
				244	/* Some platforms have a broken implementation of
				245	* mbstowcs which does not count the characters that
				246	* would result from conversion. Use an upper bound.
				247	*/
				248	argsize = strlen(arg);
				249	#else
				250	argsize = mbstowcs(NULL, arg, 0);
				251	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	252	if (argsize != (size_t)-1) {
				253	res = (wchar_t )PyMem_Malloc((argsize+1)sizeof(wchar_t));
				254	if (!res)
				255	goto oom;
				256	count = mbstowcs(res, arg, argsize+1);
				257	if (count != (size_t)-1) {
				258	wchar_t *tmp;
				259	/* Only use the result if it contains no
				260	surrogate characters. */
				261	for (tmp = res; *tmp != 0 &&
				262	(tmp < 0xd800 \|\| tmp > 0xdfff); tmp++)
				263	;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	264	if (*tmp == 0) {
				265	if (size != NULL)
				266	*size = count;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	267	return res;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	268	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	269	}
				270	PyMem_Free(res);
				271	}
				272	/* Conversion failed. Fall back to escaping with surrogateescape. */
				273	#ifdef HAVE_MBRTOWC
				274	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
				275
				276	/* Overallocate; as multi-byte characters are in the argument, the
				277	actual output could use less memory. */
				278	argsize = strlen(arg) + 1;
				279	res = (wchar_t)PyMem_Malloc(argsizesizeof(wchar_t));
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	280	if (!res)
				281	goto oom;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	282	in = (unsigned char*)arg;
				283	out = res;
				284	memset(&mbs, 0, sizeof mbs);
				285	while (argsize) {
				286	size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
				287	if (converted == 0)
				288	/* Reached end of string; null char stored. */
				289	break;
				290	if (converted == (size_t)-2) {
				291	/* Incomplete character. This should never happen,
				292	since we provide everything that we have -
				293	unless there is a bug in the C library, or I
				294	misunderstood how mbrtowc works. */
				295	fprintf(stderr, "unexpected mbrtowc result -2\n");
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	296	PyMem_Free(res);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	297	return NULL;
				298	}
				299	if (converted == (size_t)-1) {
				300	/* Conversion error. Escape as UTF-8b, and start over
				301	in the initial shift state. */
				302	out++ = 0xdc00 + in++;
				303	argsize--;
				304	memset(&mbs, 0, sizeof mbs);
				305	continue;
				306	}
				307	if (out >= 0xd800 && out <= 0xdfff) {
				308	/* Surrogate character. Escape the original
				309	byte sequence with surrogateescape. */
				310	argsize -= converted;
				311	while (converted--)
				312	out++ = 0xdc00 + in++;
				313	continue;
				314	}
				315	/* successfully converted some bytes */
				316	in += converted;
				317	argsize -= converted;
				318	out++;
				319	}
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	320	if (size != NULL)
				321	*size = out - res;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	322	#else /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	323	/* Cannot use C locale for escaping; manually escape as if charset
				324	is ASCII (i.e. escape all bytes > 128. This will still roundtrip
				325	correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	326	res = decode_ascii_surrogateescape(arg, size);
				327	if (res == NULL)
				328	goto oom;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	329	#endif /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	330	return res;
				331	oom:
				332	fprintf(stderr, "out of memory\n");
				333	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	334	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	335	}
				336
				337	/* Encode a (wide) character string to the locale encoding with the
				338	surrogateescape error handler (characters in range U+DC80..U+DCFF are
				339	converted to bytes 0x80..0xFF).
				340
				341	This function is the reverse of _Py_char2wchar().
				342
				343	Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	344	the memory), or NULL on conversion or memory allocation error.
				345
				346	If error_pos is not NULL: *error_pos is the index of the invalid character
				347	on conversion error, or (size_t)-1 otherwise. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	348	char*
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	349	_Py_wchar2char(const wchar_t text, size_t error_pos)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	350	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	351	#ifdef __APPLE__
				352	Py_ssize_t len;
				353	PyObject unicode, bytes = NULL;
				354	char *cpath;
				355
				356	unicode = PyUnicode_FromWideChar(text, wcslen(text));
				357	if (unicode == NULL)
				358	return NULL;
				359
Victor Stinner	41a234a	2012-12-03 14:11:57 +0100	[diff] [blame]	360	bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
				361	PyUnicode_GET_SIZE(unicode),
				362	"surrogateescape");
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	363	Py_DECREF(unicode);
				364	if (bytes == NULL) {
				365	PyErr_Clear();
				366	if (error_pos != NULL)
				367	*error_pos = (size_t)-1;
				368	return NULL;
				369	}
				370
				371	len = PyBytes_GET_SIZE(bytes);
				372	cpath = PyMem_Malloc(len+1);
				373	if (cpath == NULL) {
				374	PyErr_Clear();
				375	Py_DECREF(bytes);
				376	if (error_pos != NULL)
				377	*error_pos = (size_t)-1;
				378	return NULL;
				379	}
				380	memcpy(cpath, PyBytes_AsString(bytes), len + 1);
				381	Py_DECREF(bytes);
				382	return cpath;
				383	#else /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	384	const size_t len = wcslen(text);
				385	char result = NULL, bytes = NULL;
				386	size_t i, size, converted;
				387	wchar_t c, buf[2];
				388
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	389	#ifndef MS_WINDOWS
				390	if (force_ascii == -1)
				391	force_ascii = check_force_ascii();
				392
				393	if (force_ascii)
				394	return encode_ascii_surrogateescape(text, error_pos);
				395	#endif
				396
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	397	/* The function works in two steps:
				398	1. compute the length of the output buffer in bytes (size)
				399	2. outputs the bytes */
				400	size = 0;
				401	buf[1] = 0;
				402	while (1) {
				403	for (i=0; i < len; i++) {
				404	c = text[i];
				405	if (c >= 0xdc80 && c <= 0xdcff) {
				406	/* UTF-8b surrogate */
				407	if (bytes != NULL) {
				408	*bytes++ = c - 0xdc00;
				409	size--;
				410	}
				411	else
				412	size++;
				413	continue;
				414	}
				415	else {
				416	buf[0] = c;
				417	if (bytes != NULL)
				418	converted = wcstombs(bytes, buf, size);
				419	else
				420	converted = wcstombs(NULL, buf, 0);
				421	if (converted == (size_t)-1) {
				422	if (result != NULL)
				423	PyMem_Free(result);
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	424	if (error_pos != NULL)
				425	*error_pos = i;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	426	return NULL;
				427	}
				428	if (bytes != NULL) {
				429	bytes += converted;
				430	size -= converted;
				431	}
				432	else
				433	size += converted;
				434	}
				435	}
				436	if (result != NULL) {
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	437	*bytes = '\0';
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	438	break;
				439	}
				440
				441	size += 1; /* nul byte at the end */
				442	result = PyMem_Malloc(size);
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	443	if (result == NULL) {
				444	if (error_pos != NULL)
				445	*error_pos = (size_t)-1;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	446	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	447	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	448	bytes = result;
				449	}
				450	return result;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	451	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	452	}
				453
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	454	/* In principle, this should use HAVE__WSTAT, and _wstat
				455	should be detected by autoconf. However, no current
				456	POSIX system provides that function, so testing for
				457	it is pointless.
				458	Not sure whether the MS_WINDOWS guards are necessary:
				459	perhaps for cygwin/mingw builds?
				460	*/
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	461	#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	462
				463	/* Get file status. Encode the path to the locale encoding. */
				464
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	465	int
				466	_Py_wstat(const wchar_t* path, struct stat *buf)
				467	{
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	468	int err;
				469	char *fname;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	470	fname = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	471	if (fname == NULL) {
				472	errno = EINVAL;
				473	return -1;
				474	}
				475	err = stat(fname, buf);
				476	PyMem_Free(fname);
				477	return err;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	478	}
				479	#endif
				480
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	481	#ifdef HAVE_STAT
				482
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	483	/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
				484	call stat() otherwise. Only fill st_mode attribute on Windows.
				485
				486	Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
				487	unicode error. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	488
				489	int
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	490	_Py_stat(PyObject path, struct stat statbuf)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	491	{
				492	#ifdef MS_WINDOWS
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	493	int err;
				494	struct _stat wstatbuf;
				495
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	496	err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	497	if (!err)
				498	statbuf->st_mode = wstatbuf.st_mode;
				499	return err;
				500	#else
				501	int ret;
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	502	PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	503	if (bytes == NULL)
				504	return -1;
				505	ret = stat(PyBytes_AS_STRING(bytes), statbuf);
				506	Py_DECREF(bytes);
				507	return ret;
				508	#endif
				509	}
				510
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	511	#endif
				512
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	513	/* Open a file. Use _wfopen() on Windows, encode the path to the locale
				514	encoding and use fopen() otherwise. */
				515
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	516	FILE *
				517	_Py_wfopen(const wchar_t path, const wchar_t mode)
				518	{
				519	#ifndef MS_WINDOWS
				520	FILE *f;
				521	char *cpath;
				522	char cmode[10];
				523	size_t r;
				524	r = wcstombs(cmode, mode, 10);
				525	if (r == (size_t)-1 \|\| r >= 10) {
				526	errno = EINVAL;
				527	return NULL;
				528	}
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	529	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	530	if (cpath == NULL)
				531	return NULL;
				532	f = fopen(cpath, cmode);
				533	PyMem_Free(cpath);
				534	return f;
				535	#else
				536	return _wfopen(path, mode);
				537	#endif
				538	}
				539
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	540	/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
				541	call fopen() otherwise.
				542
				543	Return the new file object on success, or NULL if the file cannot be open or
				544	(if PyErr_Occurred()) on unicode error */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	545
				546	FILE*
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	547	_Py_fopen(PyObject path, const char mode)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	548	{
				549	#ifdef MS_WINDOWS
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	550	wchar_t wmode[10];
				551	int usize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	552
				553	usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
				554	if (usize == 0)
				555	return NULL;
				556
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	557	return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	558	#else
				559	FILE *f;
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	560	PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	561	if (bytes == NULL)
				562	return NULL;
				563	f = fopen(PyBytes_AS_STRING(bytes), mode);
				564	Py_DECREF(bytes);
				565	return f;
				566	#endif
				567	}
				568
				569	#ifdef HAVE_READLINK
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	570
				571	/* Read value of symbolic link. Encode the path to the locale encoding, decode
				572	the result from the locale encoding. */
				573
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	574	int
				575	_Py_wreadlink(const wchar_t path, wchar_t buf, size_t bufsiz)
				576	{
				577	char *cpath;
				578	char cbuf[PATH_MAX];
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	579	wchar_t *wbuf;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	580	int res;
				581	size_t r1;
				582
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	583	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	584	if (cpath == NULL) {
				585	errno = EINVAL;
				586	return -1;
				587	}
				588	res = (int)readlink(cpath, cbuf, PATH_MAX);
				589	PyMem_Free(cpath);
				590	if (res == -1)
				591	return -1;
				592	if (res == PATH_MAX) {
				593	errno = EINVAL;
				594	return -1;
				595	}
				596	cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	597	wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner	350147b	2010-10-16 22:52:09 +0000	[diff] [blame]	598	if (wbuf == NULL) {
				599	errno = EINVAL;
				600	return -1;
				601	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	602	if (bufsiz <= r1) {
				603	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	604	errno = EINVAL;
				605	return -1;
				606	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	607	wcsncpy(buf, wbuf, bufsiz);
				608	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	609	return (int)r1;
				610	}
				611	#endif
				612
				613	#ifdef HAVE_REALPATH
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	614
				615	/* Return the canonicalized absolute pathname. Encode path to the locale
				616	encoding, decode the result from the locale encoding. */
				617
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	618	wchar_t*
Victor Stinner	015f4d8	2010-10-07 22:29:53 +0000	[diff] [blame]	619	_Py_wrealpath(const wchar_t *path,
				620	wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	621	{
				622	char *cpath;
				623	char cresolved_path[PATH_MAX];
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	624	wchar_t *wresolved_path;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	625	char *res;
				626	size_t r;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	627	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	628	if (cpath == NULL) {
				629	errno = EINVAL;
				630	return NULL;
				631	}
				632	res = realpath(cpath, cresolved_path);
				633	PyMem_Free(cpath);
				634	if (res == NULL)
				635	return NULL;
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	636
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	637	wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	638	if (wresolved_path == NULL) {
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	639	errno = EINVAL;
				640	return NULL;
				641	}
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	642	if (resolved_path_size <= r) {
				643	PyMem_Free(wresolved_path);
				644	errno = EINVAL;
				645	return NULL;
				646	}
				647	wcsncpy(resolved_path, wresolved_path, resolved_path_size);
				648	PyMem_Free(wresolved_path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	649	return resolved_path;
				650	}
				651	#endif
				652
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	653	/* Get the current directory. size is the buffer size in wide characters
				654	including the null character. Decode the path from the locale encoding. */
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	655
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	656	wchar_t*
				657	_Py_wgetcwd(wchar_t *buf, size_t size)
				658	{
				659	#ifdef MS_WINDOWS
				660	return _wgetcwd(buf, size);
				661	#else
				662	char fname[PATH_MAX];
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	663	wchar_t *wname;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	664	size_t len;
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	665
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	666	if (getcwd(fname, PATH_MAX) == NULL)
				667	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	668	wname = _Py_char2wchar(fname, &len);
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	669	if (wname == NULL)
				670	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	671	if (size <= len) {
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	672	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	673	return NULL;
				674	}
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	675	wcsncpy(buf, wname, size);
				676	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	677	return buf;
				678	#endif
				679	}
				680