Blame - Python/fileutils.c - platform/external/python/cpython3

blob: 7d08e0726a6b11f83e94120b9fafcb0a16ed7d04 [file] [log] [blame]

Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	1	#include "Python.h"
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	2	#ifdef MS_WINDOWS
				3	# include <windows.h>
				4	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	5
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	6	#ifdef HAVE_LANGINFO_H
				7	#include <locale.h>
				8	#include <langinfo.h>
				9	#endif
				10
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	11	#ifdef __APPLE__
				12	extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
				13	#endif
				14
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	15	#if !defined(__APPLE__) && !defined(MS_WINDOWS)
				16	extern int _Py_normalize_encoding(const char , char , size_t);
				17
				18	/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
				19	On these operating systems, nl_langinfo(CODESET) announces an alias of the
				20	ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
				21	ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
				22	locale.getpreferredencoding() codec. For example, if command line arguments
				23	are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
				24	UnicodeEncodeError instead of retrieving the original byte string.
				25
				26	The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
				27	nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
				28	one byte in range 0x80-0xff can be decoded from the locale encoding. The
				29	workaround is also enabled on error, for example if getting the locale
				30	failed.
				31
				32	Values of locale_is_ascii:
				33
				34	1: the workaround is used: _Py_wchar2char() uses
				35	encode_ascii_surrogateescape() and _Py_char2wchar() uses
				36	decode_ascii_surrogateescape()
				37	0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
				38	_Py_char2wchar() uses mbstowcs()
				39	-1: unknown, need to call check_force_ascii() to get the value
				40	*/
				41	static int force_ascii = -1;
				42
				43	static int
				44	check_force_ascii(void)
				45	{
				46	char *loc;
				47	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				48	char codeset, *alias;
				49	char encoding[100];
				50	int is_ascii;
				51	unsigned int i;
				52	char* ascii_aliases[] = {
				53	"ascii",
				54	"646",
				55	"ansi-x3.4-1968",
				56	"ansi-x3-4-1968",
				57	"ansi-x3.4-1986",
				58	"cp367",
				59	"csascii",
				60	"ibm367",
				61	"iso646-us",
				62	"iso-646.irv-1991",
				63	"iso-ir-6",
				64	"us",
				65	"us-ascii",
				66	NULL
				67	};
				68	#endif
				69
				70	loc = setlocale(LC_CTYPE, NULL);
				71	if (loc == NULL)
				72	goto error;
				73	if (strcmp(loc, "C") != 0) {
				74	/* the LC_CTYPE locale is different than C */
				75	return 0;
				76	}
				77
				78	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				79	codeset = nl_langinfo(CODESET);
				80	if (!codeset \|\| codeset[0] == '\0') {
				81	/* CODESET is not set or empty */
				82	goto error;
				83	}
				84	if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
				85	goto error;
				86
				87	is_ascii = 0;
				88	for (alias=ascii_aliases; *alias != NULL; alias++) {
				89	if (strcmp(encoding, *alias) == 0) {
				90	is_ascii = 1;
				91	break;
				92	}
				93	}
				94	if (!is_ascii) {
				95	/* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
				96	return 0;
				97	}
				98
				99	for (i=0x80; i<0xff; i++) {
				100	unsigned char ch;
				101	wchar_t wch;
				102	size_t res;
				103
				104	ch = (unsigned char)i;
				105	res = mbstowcs(&wch, (char*)&ch, 1);
				106	if (res != (size_t)-1) {
				107	/* decoding a non-ASCII character from the locale encoding succeed:
				108	the locale encoding is not ASCII, force ASCII */
				109	return 1;
				110	}
				111	}
				112	/* None of the bytes in the range 0x80-0xff can be decoded from the locale
				113	encoding: the locale encoding is really ASCII */
				114	return 0;
				115	#else
				116	/* nl_langinfo(CODESET) is not available: always force ASCII */
				117	return 1;
				118	#endif
				119
				120	error:
				121	/* if an error occured, force the ASCII encoding */
				122	return 1;
				123	}
				124
				125	static char*
				126	encode_ascii_surrogateescape(const wchar_t text, size_t error_pos)
				127	{
				128	char result = NULL, out;
				129	size_t len, i;
				130	wchar_t ch;
				131
				132	if (error_pos != NULL)
				133	*error_pos = (size_t)-1;
				134
				135	len = wcslen(text);
				136
				137	result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
				138	if (result == NULL)
				139	return NULL;
				140
				141	out = result;
				142	for (i=0; i<len; i++) {
				143	ch = text[i];
				144
				145	if (ch <= 0x7f) {
				146	/* ASCII character */
				147	*out++ = (char)ch;
				148	}
				149	else if (0xdc80 <= ch && ch <= 0xdcff) {
				150	/* UTF-8b surrogate */
				151	*out++ = (char)(ch - 0xdc00);
				152	}
				153	else {
				154	if (error_pos != NULL)
				155	*error_pos = i;
				156	PyMem_Free(result);
				157	return NULL;
				158	}
				159	}
				160	*out = '\0';
				161	return result;
				162	}
				163	#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
				164
				165	#if !defined(__APPLE__) && (!defined(MS_WINDOWS) \|\| !defined(HAVE_MBRTOWC))
				166	static wchar_t*
				167	decode_ascii_surrogateescape(const char arg, size_t size)
				168	{
				169	wchar_t *res;
				170	unsigned char *in;
				171	wchar_t *out;
Benjamin Peterson	f18bf6f	2015-01-04 16:03:17 -0600	[diff] [blame^]	172	size_t argsize = strlen(arg) + 1;
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	173
Benjamin Peterson	f18bf6f	2015-01-04 16:03:17 -0600	[diff] [blame^]	174	if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
				175	return NULL;
				176	res = PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	177	if (!res)
				178	return NULL;
				179
				180	in = (unsigned char*)arg;
				181	out = res;
				182	while(*in)
				183	if(*in < 128)
				184	out++ = in++;
				185	else
				186	out++ = 0xdc00 + in++;
				187	*out = 0;
				188	if (size != NULL)
				189	*size = out - res;
				190	return res;
				191	}
				192	#endif
				193
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	194
				195	/* Decode a byte string from the locale encoding with the
				196	surrogateescape error handler (undecodable bytes are decoded as characters
				197	in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
				198	character, escape the bytes using the surrogateescape error handler instead
				199	of decoding them.
				200
				201	Use _Py_wchar2char() to encode the character string back to a byte string.
				202
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	203	Return a pointer to a newly allocated wide character string (use
				204	PyMem_Free() to free the memory) and write the number of written wide
				205	characters excluding the null character into *size if size is not NULL, or
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	206	NULL on error (conversion or memory allocation error).
				207
				208	Conversion errors should never happen, unless there is a bug in the C
				209	library. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	210	wchar_t*
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	211	_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	212	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	213	#ifdef __APPLE__
				214	wchar_t *wstr;
				215	wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
				216	if (size != NULL) {
				217	if (wstr != NULL)
				218	*size = wcslen(wstr);
				219	else
				220	*size = (size_t)-1;
				221	}
				222	return wstr;
				223	#else
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	224	wchar_t *res;
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	225	size_t argsize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	226	size_t count;
				227	unsigned char *in;
				228	wchar_t *out;
				229	#ifdef HAVE_MBRTOWC
				230	mbstate_t mbs;
				231	#endif
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	232
				233	#ifndef MS_WINDOWS
				234	if (force_ascii == -1)
				235	force_ascii = check_force_ascii();
				236
				237	if (force_ascii) {
				238	/* force ASCII encoding to workaround mbstowcs() issue */
				239	res = decode_ascii_surrogateescape(arg, size);
				240	if (res == NULL)
				241	goto oom;
				242	return res;
				243	}
				244	#endif
				245
				246	#ifdef HAVE_BROKEN_MBSTOWCS
				247	/* Some platforms have a broken implementation of
				248	* mbstowcs which does not count the characters that
				249	* would result from conversion. Use an upper bound.
				250	*/
				251	argsize = strlen(arg);
				252	#else
				253	argsize = mbstowcs(NULL, arg, 0);
				254	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	255	if (argsize != (size_t)-1) {
Benjamin Peterson	f18bf6f	2015-01-04 16:03:17 -0600	[diff] [blame^]	256	if (argsize == PY_SSIZE_T_MAX)
				257	goto oom;
				258	argsize += 1;
				259	if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
				260	goto oom;
				261	res = (wchar_t )PyMem_Malloc(argsizesizeof(wchar_t));
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	262	if (!res)
				263	goto oom;
Benjamin Peterson	f18bf6f	2015-01-04 16:03:17 -0600	[diff] [blame^]	264	count = mbstowcs(res, arg, argsize);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	265	if (count != (size_t)-1) {
				266	wchar_t *tmp;
				267	/* Only use the result if it contains no
				268	surrogate characters. */
				269	for (tmp = res; *tmp != 0 &&
				270	(tmp < 0xd800 \|\| tmp > 0xdfff); tmp++)
				271	;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	272	if (*tmp == 0) {
				273	if (size != NULL)
				274	*size = count;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	275	return res;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	276	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	277	}
				278	PyMem_Free(res);
				279	}
				280	/* Conversion failed. Fall back to escaping with surrogateescape. */
				281	#ifdef HAVE_MBRTOWC
				282	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
				283
				284	/* Overallocate; as multi-byte characters are in the argument, the
				285	actual output could use less memory. */
				286	argsize = strlen(arg) + 1;
Benjamin Peterson	f18bf6f	2015-01-04 16:03:17 -0600	[diff] [blame^]	287	if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
				288	goto oom;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	289	res = (wchar_t)PyMem_Malloc(argsizesizeof(wchar_t));
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	290	if (!res)
				291	goto oom;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	292	in = (unsigned char*)arg;
				293	out = res;
				294	memset(&mbs, 0, sizeof mbs);
				295	while (argsize) {
				296	size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
				297	if (converted == 0)
				298	/* Reached end of string; null char stored. */
				299	break;
				300	if (converted == (size_t)-2) {
				301	/* Incomplete character. This should never happen,
				302	since we provide everything that we have -
				303	unless there is a bug in the C library, or I
				304	misunderstood how mbrtowc works. */
				305	fprintf(stderr, "unexpected mbrtowc result -2\n");
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	306	PyMem_Free(res);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	307	return NULL;
				308	}
				309	if (converted == (size_t)-1) {
				310	/* Conversion error. Escape as UTF-8b, and start over
				311	in the initial shift state. */
				312	out++ = 0xdc00 + in++;
				313	argsize--;
				314	memset(&mbs, 0, sizeof mbs);
				315	continue;
				316	}
				317	if (out >= 0xd800 && out <= 0xdfff) {
				318	/* Surrogate character. Escape the original
				319	byte sequence with surrogateescape. */
				320	argsize -= converted;
				321	while (converted--)
				322	out++ = 0xdc00 + in++;
				323	continue;
				324	}
				325	/* successfully converted some bytes */
				326	in += converted;
				327	argsize -= converted;
				328	out++;
				329	}
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	330	if (size != NULL)
				331	*size = out - res;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	332	#else /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	333	/* Cannot use C locale for escaping; manually escape as if charset
				334	is ASCII (i.e. escape all bytes > 128. This will still roundtrip
				335	correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	336	res = decode_ascii_surrogateescape(arg, size);
				337	if (res == NULL)
				338	goto oom;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	339	#endif /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	340	return res;
				341	oom:
				342	fprintf(stderr, "out of memory\n");
				343	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	344	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	345	}
				346
				347	/* Encode a (wide) character string to the locale encoding with the
				348	surrogateescape error handler (characters in range U+DC80..U+DCFF are
				349	converted to bytes 0x80..0xFF).
				350
				351	This function is the reverse of _Py_char2wchar().
				352
				353	Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	354	the memory), or NULL on conversion or memory allocation error.
				355
				356	If error_pos is not NULL: *error_pos is the index of the invalid character
				357	on conversion error, or (size_t)-1 otherwise. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	358	char*
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	359	_Py_wchar2char(const wchar_t text, size_t error_pos)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	360	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	361	#ifdef __APPLE__
				362	Py_ssize_t len;
				363	PyObject unicode, bytes = NULL;
				364	char *cpath;
				365
				366	unicode = PyUnicode_FromWideChar(text, wcslen(text));
				367	if (unicode == NULL)
				368	return NULL;
				369
Victor Stinner	41a234a	2012-12-03 14:11:57 +0100	[diff] [blame]	370	bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
				371	PyUnicode_GET_SIZE(unicode),
				372	"surrogateescape");
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	373	Py_DECREF(unicode);
				374	if (bytes == NULL) {
				375	PyErr_Clear();
				376	if (error_pos != NULL)
				377	*error_pos = (size_t)-1;
				378	return NULL;
				379	}
				380
				381	len = PyBytes_GET_SIZE(bytes);
				382	cpath = PyMem_Malloc(len+1);
				383	if (cpath == NULL) {
				384	PyErr_Clear();
				385	Py_DECREF(bytes);
				386	if (error_pos != NULL)
				387	*error_pos = (size_t)-1;
				388	return NULL;
				389	}
				390	memcpy(cpath, PyBytes_AsString(bytes), len + 1);
				391	Py_DECREF(bytes);
				392	return cpath;
				393	#else /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	394	const size_t len = wcslen(text);
				395	char result = NULL, bytes = NULL;
				396	size_t i, size, converted;
				397	wchar_t c, buf[2];
				398
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	399	#ifndef MS_WINDOWS
				400	if (force_ascii == -1)
				401	force_ascii = check_force_ascii();
				402
				403	if (force_ascii)
				404	return encode_ascii_surrogateescape(text, error_pos);
				405	#endif
				406
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	407	/* The function works in two steps:
				408	1. compute the length of the output buffer in bytes (size)
				409	2. outputs the bytes */
				410	size = 0;
				411	buf[1] = 0;
				412	while (1) {
				413	for (i=0; i < len; i++) {
				414	c = text[i];
				415	if (c >= 0xdc80 && c <= 0xdcff) {
				416	/* UTF-8b surrogate */
				417	if (bytes != NULL) {
				418	*bytes++ = c - 0xdc00;
				419	size--;
				420	}
				421	else
				422	size++;
				423	continue;
				424	}
				425	else {
				426	buf[0] = c;
				427	if (bytes != NULL)
				428	converted = wcstombs(bytes, buf, size);
				429	else
				430	converted = wcstombs(NULL, buf, 0);
				431	if (converted == (size_t)-1) {
				432	if (result != NULL)
				433	PyMem_Free(result);
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	434	if (error_pos != NULL)
				435	*error_pos = i;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	436	return NULL;
				437	}
				438	if (bytes != NULL) {
				439	bytes += converted;
				440	size -= converted;
				441	}
				442	else
				443	size += converted;
				444	}
				445	}
				446	if (result != NULL) {
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	447	*bytes = '\0';
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	448	break;
				449	}
				450
				451	size += 1; /* nul byte at the end */
				452	result = PyMem_Malloc(size);
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	453	if (result == NULL) {
				454	if (error_pos != NULL)
				455	*error_pos = (size_t)-1;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	456	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	457	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	458	bytes = result;
				459	}
				460	return result;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	461	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	462	}
				463
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	464	/* In principle, this should use HAVE__WSTAT, and _wstat
				465	should be detected by autoconf. However, no current
				466	POSIX system provides that function, so testing for
				467	it is pointless.
				468	Not sure whether the MS_WINDOWS guards are necessary:
				469	perhaps for cygwin/mingw builds?
				470	*/
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	471	#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	472
				473	/* Get file status. Encode the path to the locale encoding. */
				474
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	475	int
				476	_Py_wstat(const wchar_t* path, struct stat *buf)
				477	{
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	478	int err;
				479	char *fname;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	480	fname = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	481	if (fname == NULL) {
				482	errno = EINVAL;
				483	return -1;
				484	}
				485	err = stat(fname, buf);
				486	PyMem_Free(fname);
				487	return err;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	488	}
				489	#endif
				490
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	491	#ifdef HAVE_STAT
				492
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	493	/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
				494	call stat() otherwise. Only fill st_mode attribute on Windows.
				495
				496	Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
				497	unicode error. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	498
				499	int
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	500	_Py_stat(PyObject path, struct stat statbuf)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	501	{
				502	#ifdef MS_WINDOWS
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	503	int err;
				504	struct _stat wstatbuf;
				505
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	506	err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	507	if (!err)
				508	statbuf->st_mode = wstatbuf.st_mode;
				509	return err;
				510	#else
				511	int ret;
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	512	PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	513	if (bytes == NULL)
				514	return -1;
				515	ret = stat(PyBytes_AS_STRING(bytes), statbuf);
				516	Py_DECREF(bytes);
				517	return ret;
				518	#endif
				519	}
				520
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	521	#endif
				522
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	523	/* Open a file. Use _wfopen() on Windows, encode the path to the locale
				524	encoding and use fopen() otherwise. */
				525
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	526	FILE *
				527	_Py_wfopen(const wchar_t path, const wchar_t mode)
				528	{
				529	#ifndef MS_WINDOWS
				530	FILE *f;
				531	char *cpath;
				532	char cmode[10];
				533	size_t r;
				534	r = wcstombs(cmode, mode, 10);
				535	if (r == (size_t)-1 \|\| r >= 10) {
				536	errno = EINVAL;
				537	return NULL;
				538	}
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	539	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	540	if (cpath == NULL)
				541	return NULL;
				542	f = fopen(cpath, cmode);
				543	PyMem_Free(cpath);
				544	return f;
				545	#else
				546	return _wfopen(path, mode);
				547	#endif
				548	}
				549
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	550	/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
				551	call fopen() otherwise.
				552
				553	Return the new file object on success, or NULL if the file cannot be open or
				554	(if PyErr_Occurred()) on unicode error */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	555
				556	FILE*
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	557	_Py_fopen(PyObject path, const char mode)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	558	{
				559	#ifdef MS_WINDOWS
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	560	wchar_t wmode[10];
				561	int usize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	562
				563	usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
				564	if (usize == 0)
				565	return NULL;
				566
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	567	return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	568	#else
				569	FILE *f;
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	570	PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	571	if (bytes == NULL)
				572	return NULL;
				573	f = fopen(PyBytes_AS_STRING(bytes), mode);
				574	Py_DECREF(bytes);
				575	return f;
				576	#endif
				577	}
				578
				579	#ifdef HAVE_READLINK
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	580
				581	/* Read value of symbolic link. Encode the path to the locale encoding, decode
				582	the result from the locale encoding. */
				583
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	584	int
				585	_Py_wreadlink(const wchar_t path, wchar_t buf, size_t bufsiz)
				586	{
				587	char *cpath;
				588	char cbuf[PATH_MAX];
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	589	wchar_t *wbuf;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	590	int res;
				591	size_t r1;
				592
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	593	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	594	if (cpath == NULL) {
				595	errno = EINVAL;
				596	return -1;
				597	}
				598	res = (int)readlink(cpath, cbuf, PATH_MAX);
				599	PyMem_Free(cpath);
				600	if (res == -1)
				601	return -1;
				602	if (res == PATH_MAX) {
				603	errno = EINVAL;
				604	return -1;
				605	}
				606	cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	607	wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner	350147b	2010-10-16 22:52:09 +0000	[diff] [blame]	608	if (wbuf == NULL) {
				609	errno = EINVAL;
				610	return -1;
				611	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	612	if (bufsiz <= r1) {
				613	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	614	errno = EINVAL;
				615	return -1;
				616	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	617	wcsncpy(buf, wbuf, bufsiz);
				618	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	619	return (int)r1;
				620	}
				621	#endif
				622
				623	#ifdef HAVE_REALPATH
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	624
				625	/* Return the canonicalized absolute pathname. Encode path to the locale
				626	encoding, decode the result from the locale encoding. */
				627
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	628	wchar_t*
Victor Stinner	015f4d8	2010-10-07 22:29:53 +0000	[diff] [blame]	629	_Py_wrealpath(const wchar_t *path,
				630	wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	631	{
				632	char *cpath;
				633	char cresolved_path[PATH_MAX];
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	634	wchar_t *wresolved_path;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	635	char *res;
				636	size_t r;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	637	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	638	if (cpath == NULL) {
				639	errno = EINVAL;
				640	return NULL;
				641	}
				642	res = realpath(cpath, cresolved_path);
				643	PyMem_Free(cpath);
				644	if (res == NULL)
				645	return NULL;
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	646
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	647	wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	648	if (wresolved_path == NULL) {
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	649	errno = EINVAL;
				650	return NULL;
				651	}
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	652	if (resolved_path_size <= r) {
				653	PyMem_Free(wresolved_path);
				654	errno = EINVAL;
				655	return NULL;
				656	}
				657	wcsncpy(resolved_path, wresolved_path, resolved_path_size);
				658	PyMem_Free(wresolved_path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	659	return resolved_path;
				660	}
				661	#endif
				662
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	663	/* Get the current directory. size is the buffer size in wide characters
				664	including the null character. Decode the path from the locale encoding. */
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	665
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	666	wchar_t*
				667	_Py_wgetcwd(wchar_t *buf, size_t size)
				668	{
				669	#ifdef MS_WINDOWS
				670	return _wgetcwd(buf, size);
				671	#else
				672	char fname[PATH_MAX];
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	673	wchar_t *wname;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	674	size_t len;
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	675
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	676	if (getcwd(fname, PATH_MAX) == NULL)
				677	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	678	wname = _Py_char2wchar(fname, &len);
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	679	if (wname == NULL)
				680	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	681	if (size <= len) {
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	682	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	683	return NULL;
				684	}
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	685	wcsncpy(buf, wname, size);
				686	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	687	return buf;
				688	#endif
				689	}
				690