Blame - Python/fileutils.c - platform/external/python/cpython3

blob: d25111f4bb451713f3965d00e2807d52caf139c5 [file] [log] [blame]

Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	1	#include "Python.h"
Stefan Krah	6df5cae	2012-11-12 20:14:36 +0100	[diff] [blame]	2	#include "osdefs.h"
Stefan Krah	6c01e38	2014-01-20 15:31:08 +0100	[diff] [blame^]	3	#include <locale.h>
				4
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	5	#ifdef MS_WINDOWS
				6	# include <windows.h>
				7	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	8
Brett Cannon	efb00c0	2012-02-29 18:31:31 -0500	[diff] [blame]	9	#ifdef HAVE_LANGINFO_H
				10	#include <langinfo.h>
				11	#endif
				12
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	13	#ifdef __APPLE__
				14	extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
				15	#endif
				16
Brett Cannon	efb00c0	2012-02-29 18:31:31 -0500	[diff] [blame]	17	PyObject *
				18	_Py_device_encoding(int fd)
				19	{
				20	#if defined(MS_WINDOWS) \|\| defined(MS_WIN64)
				21	UINT cp;
				22	#endif
				23	if (!_PyVerify_fd(fd) \|\| !isatty(fd)) {
				24	Py_RETURN_NONE;
				25	}
				26	#if defined(MS_WINDOWS) \|\| defined(MS_WIN64)
				27	if (fd == 0)
				28	cp = GetConsoleCP();
				29	else if (fd == 1 \|\| fd == 2)
				30	cp = GetConsoleOutputCP();
				31	else
				32	cp = 0;
				33	/* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
				34	has no console */
				35	if (cp != 0)
				36	return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
				37	#elif defined(CODESET)
				38	{
				39	char *codeset = nl_langinfo(CODESET);
				40	if (codeset != NULL && codeset[0] != 0)
				41	return PyUnicode_FromString(codeset);
				42	}
				43	#endif
				44	Py_RETURN_NONE;
				45	}
				46
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	47	#if !defined(__APPLE__) && !defined(MS_WINDOWS)
				48	extern int _Py_normalize_encoding(const char , char , size_t);
				49
				50	/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
				51	On these operating systems, nl_langinfo(CODESET) announces an alias of the
				52	ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
				53	ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
				54	locale.getpreferredencoding() codec. For example, if command line arguments
				55	are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
				56	UnicodeEncodeError instead of retrieving the original byte string.
				57
				58	The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
				59	nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
				60	one byte in range 0x80-0xff can be decoded from the locale encoding. The
				61	workaround is also enabled on error, for example if getting the locale
				62	failed.
				63
				64	Values of locale_is_ascii:
				65
				66	1: the workaround is used: _Py_wchar2char() uses
				67	encode_ascii_surrogateescape() and _Py_char2wchar() uses
				68	decode_ascii_surrogateescape()
				69	0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
				70	_Py_char2wchar() uses mbstowcs()
				71	-1: unknown, need to call check_force_ascii() to get the value
				72	*/
				73	static int force_ascii = -1;
				74
				75	static int
				76	check_force_ascii(void)
				77	{
				78	char *loc;
				79	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				80	char codeset, *alias;
				81	char encoding[100];
				82	int is_ascii;
				83	unsigned int i;
				84	char* ascii_aliases[] = {
				85	"ascii",
				86	"646",
				87	"ansi-x3.4-1968",
				88	"ansi-x3-4-1968",
				89	"ansi-x3.4-1986",
				90	"cp367",
				91	"csascii",
				92	"ibm367",
				93	"iso646-us",
				94	"iso-646.irv-1991",
				95	"iso-ir-6",
				96	"us",
				97	"us-ascii",
				98	NULL
				99	};
				100	#endif
				101
				102	loc = setlocale(LC_CTYPE, NULL);
				103	if (loc == NULL)
				104	goto error;
				105	if (strcmp(loc, "C") != 0) {
				106	/* the LC_CTYPE locale is different than C */
				107	return 0;
				108	}
				109
				110	#if defined(HAVE_LANGINFO_H) && defined(CODESET)
				111	codeset = nl_langinfo(CODESET);
				112	if (!codeset \|\| codeset[0] == '\0') {
				113	/* CODESET is not set or empty */
				114	goto error;
				115	}
				116	if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
				117	goto error;
				118
				119	is_ascii = 0;
				120	for (alias=ascii_aliases; *alias != NULL; alias++) {
				121	if (strcmp(encoding, *alias) == 0) {
				122	is_ascii = 1;
				123	break;
				124	}
				125	}
				126	if (!is_ascii) {
				127	/* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
				128	return 0;
				129	}
				130
				131	for (i=0x80; i<0xff; i++) {
				132	unsigned char ch;
				133	wchar_t wch;
				134	size_t res;
				135
				136	ch = (unsigned char)i;
				137	res = mbstowcs(&wch, (char*)&ch, 1);
				138	if (res != (size_t)-1) {
				139	/* decoding a non-ASCII character from the locale encoding succeed:
				140	the locale encoding is not ASCII, force ASCII */
				141	return 1;
				142	}
				143	}
				144	/* None of the bytes in the range 0x80-0xff can be decoded from the locale
				145	encoding: the locale encoding is really ASCII */
				146	return 0;
				147	#else
				148	/* nl_langinfo(CODESET) is not available: always force ASCII */
				149	return 1;
				150	#endif
				151
				152	error:
				153	/* if an error occured, force the ASCII encoding */
				154	return 1;
				155	}
				156
				157	static char*
				158	encode_ascii_surrogateescape(const wchar_t text, size_t error_pos)
				159	{
				160	char result = NULL, out;
				161	size_t len, i;
				162	wchar_t ch;
				163
				164	if (error_pos != NULL)
				165	*error_pos = (size_t)-1;
				166
				167	len = wcslen(text);
				168
				169	result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
				170	if (result == NULL)
				171	return NULL;
				172
				173	out = result;
				174	for (i=0; i<len; i++) {
				175	ch = text[i];
				176
				177	if (ch <= 0x7f) {
				178	/* ASCII character */
				179	*out++ = (char)ch;
				180	}
				181	else if (0xdc80 <= ch && ch <= 0xdcff) {
				182	/* UTF-8b surrogate */
				183	*out++ = (char)(ch - 0xdc00);
				184	}
				185	else {
				186	if (error_pos != NULL)
				187	*error_pos = i;
				188	PyMem_Free(result);
				189	return NULL;
				190	}
				191	}
				192	*out = '\0';
				193	return result;
				194	}
				195	#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
				196
				197	#if !defined(__APPLE__) && (!defined(MS_WINDOWS) \|\| !defined(HAVE_MBRTOWC))
				198	static wchar_t*
				199	decode_ascii_surrogateescape(const char arg, size_t size)
				200	{
				201	wchar_t *res;
				202	unsigned char *in;
				203	wchar_t *out;
				204
				205	res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
				206	if (!res)
				207	return NULL;
				208
				209	in = (unsigned char*)arg;
				210	out = res;
				211	while(*in)
				212	if(*in < 128)
				213	out++ = in++;
				214	else
				215	out++ = 0xdc00 + in++;
				216	*out = 0;
				217	if (size != NULL)
				218	*size = out - res;
				219	return res;
				220	}
				221	#endif
				222
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	223
				224	/* Decode a byte string from the locale encoding with the
				225	surrogateescape error handler (undecodable bytes are decoded as characters
				226	in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
				227	character, escape the bytes using the surrogateescape error handler instead
				228	of decoding them.
				229
				230	Use _Py_wchar2char() to encode the character string back to a byte string.
				231
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	232	Return a pointer to a newly allocated wide character string (use
				233	PyMem_Free() to free the memory) and write the number of written wide
				234	characters excluding the null character into *size if size is not NULL, or
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	235	NULL on error (decoding or memory allocation error). If size is not NULL,
				236	*size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
				237	error.
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	238
				239	Conversion errors should never happen, unless there is a bug in the C
				240	library. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	241	wchar_t*
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	242	_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	243	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	244	#ifdef __APPLE__
				245	wchar_t *wstr;
				246	wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
				247	if (size != NULL) {
				248	if (wstr != NULL)
				249	*size = wcslen(wstr);
				250	else
				251	*size = (size_t)-1;
				252	}
				253	return wstr;
				254	#else
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	255	wchar_t *res;
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	256	size_t argsize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	257	size_t count;
				258	unsigned char *in;
				259	wchar_t *out;
				260	#ifdef HAVE_MBRTOWC
				261	mbstate_t mbs;
				262	#endif
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	263
				264	#ifndef MS_WINDOWS
				265	if (force_ascii == -1)
				266	force_ascii = check_force_ascii();
				267
				268	if (force_ascii) {
				269	/* force ASCII encoding to workaround mbstowcs() issue */
				270	res = decode_ascii_surrogateescape(arg, size);
				271	if (res == NULL)
				272	goto oom;
				273	return res;
				274	}
				275	#endif
				276
				277	#ifdef HAVE_BROKEN_MBSTOWCS
				278	/* Some platforms have a broken implementation of
				279	* mbstowcs which does not count the characters that
				280	* would result from conversion. Use an upper bound.
				281	*/
				282	argsize = strlen(arg);
				283	#else
				284	argsize = mbstowcs(NULL, arg, 0);
				285	#endif
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	286	if (argsize != (size_t)-1) {
				287	res = (wchar_t )PyMem_Malloc((argsize+1)sizeof(wchar_t));
				288	if (!res)
				289	goto oom;
				290	count = mbstowcs(res, arg, argsize+1);
				291	if (count != (size_t)-1) {
				292	wchar_t *tmp;
				293	/* Only use the result if it contains no
				294	surrogate characters. */
				295	for (tmp = res; *tmp != 0 &&
				296	(tmp < 0xd800 \|\| tmp > 0xdfff); tmp++)
				297	;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	298	if (*tmp == 0) {
				299	if (size != NULL)
				300	*size = count;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	301	return res;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	302	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	303	}
				304	PyMem_Free(res);
				305	}
				306	/* Conversion failed. Fall back to escaping with surrogateescape. */
				307	#ifdef HAVE_MBRTOWC
				308	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
				309
				310	/* Overallocate; as multi-byte characters are in the argument, the
				311	actual output could use less memory. */
				312	argsize = strlen(arg) + 1;
				313	res = (wchar_t)PyMem_Malloc(argsizesizeof(wchar_t));
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	314	if (!res)
				315	goto oom;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	316	in = (unsigned char*)arg;
				317	out = res;
				318	memset(&mbs, 0, sizeof mbs);
				319	while (argsize) {
				320	size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
				321	if (converted == 0)
				322	/* Reached end of string; null char stored. */
				323	break;
				324	if (converted == (size_t)-2) {
				325	/* Incomplete character. This should never happen,
				326	since we provide everything that we have -
				327	unless there is a bug in the C library, or I
				328	misunderstood how mbrtowc works. */
Victor Stinner	19de4c3	2010-11-08 23:30:46 +0000	[diff] [blame]	329	PyMem_Free(res);
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	330	if (size != NULL)
				331	*size = (size_t)-2;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	332	return NULL;
				333	}
				334	if (converted == (size_t)-1) {
				335	/* Conversion error. Escape as UTF-8b, and start over
				336	in the initial shift state. */
				337	out++ = 0xdc00 + in++;
				338	argsize--;
				339	memset(&mbs, 0, sizeof mbs);
				340	continue;
				341	}
				342	if (out >= 0xd800 && out <= 0xdfff) {
				343	/* Surrogate character. Escape the original
				344	byte sequence with surrogateescape. */
				345	argsize -= converted;
				346	while (converted--)
				347	out++ = 0xdc00 + in++;
				348	continue;
				349	}
				350	/* successfully converted some bytes */
				351	in += converted;
				352	argsize -= converted;
				353	out++;
				354	}
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	355	if (size != NULL)
				356	*size = out - res;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	357	#else /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	358	/* Cannot use C locale for escaping; manually escape as if charset
				359	is ASCII (i.e. escape all bytes > 128. This will still roundtrip
				360	correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	361	res = decode_ascii_surrogateescape(arg, size);
				362	if (res == NULL)
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	363	goto oom;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	364	#endif /* HAVE_MBRTOWC */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	365	return res;
				366	oom:
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	367	if (size != NULL)
				368	*size = (size_t)-1;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	369	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	370	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	371	}
				372
				373	/* Encode a (wide) character string to the locale encoding with the
				374	surrogateescape error handler (characters in range U+DC80..U+DCFF are
				375	converted to bytes 0x80..0xFF).
				376
				377	This function is the reverse of _Py_char2wchar().
				378
				379	Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	380	the memory), or NULL on encoding or memory allocation error.
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	381
				382	If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	383	on encoding error, or (size_t)-1 otherwise. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	384	char*
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	385	_Py_wchar2char(const wchar_t text, size_t error_pos)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	386	{
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	387	#ifdef __APPLE__
				388	Py_ssize_t len;
				389	PyObject unicode, bytes = NULL;
				390	char *cpath;
				391
				392	unicode = PyUnicode_FromWideChar(text, wcslen(text));
				393	if (unicode == NULL)
				394	return NULL;
				395
				396	bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
				397	Py_DECREF(unicode);
				398	if (bytes == NULL) {
				399	PyErr_Clear();
				400	if (error_pos != NULL)
				401	*error_pos = (size_t)-1;
				402	return NULL;
				403	}
				404
				405	len = PyBytes_GET_SIZE(bytes);
				406	cpath = PyMem_Malloc(len+1);
				407	if (cpath == NULL) {
				408	PyErr_Clear();
				409	Py_DECREF(bytes);
				410	if (error_pos != NULL)
				411	*error_pos = (size_t)-1;
				412	return NULL;
				413	}
				414	memcpy(cpath, PyBytes_AsString(bytes), len + 1);
				415	Py_DECREF(bytes);
				416	return cpath;
				417	#else /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	418	const size_t len = wcslen(text);
				419	char result = NULL, bytes = NULL;
				420	size_t i, size, converted;
				421	wchar_t c, buf[2];
				422
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	423	#ifndef MS_WINDOWS
				424	if (force_ascii == -1)
				425	force_ascii = check_force_ascii();
				426
				427	if (force_ascii)
				428	return encode_ascii_surrogateescape(text, error_pos);
				429	#endif
				430
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	431	/* The function works in two steps:
				432	1. compute the length of the output buffer in bytes (size)
				433	2. outputs the bytes */
				434	size = 0;
				435	buf[1] = 0;
				436	while (1) {
				437	for (i=0; i < len; i++) {
				438	c = text[i];
				439	if (c >= 0xdc80 && c <= 0xdcff) {
				440	/* UTF-8b surrogate */
				441	if (bytes != NULL) {
				442	*bytes++ = c - 0xdc00;
				443	size--;
				444	}
				445	else
				446	size++;
				447	continue;
				448	}
				449	else {
				450	buf[0] = c;
				451	if (bytes != NULL)
				452	converted = wcstombs(bytes, buf, size);
				453	else
				454	converted = wcstombs(NULL, buf, 0);
				455	if (converted == (size_t)-1) {
				456	if (result != NULL)
				457	PyMem_Free(result);
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	458	if (error_pos != NULL)
				459	*error_pos = i;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	460	return NULL;
				461	}
				462	if (bytes != NULL) {
				463	bytes += converted;
				464	size -= converted;
				465	}
				466	else
				467	size += converted;
				468	}
				469	}
				470	if (result != NULL) {
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	471	*bytes = '\0';
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	472	break;
				473	}
				474
				475	size += 1; /* nul byte at the end */
				476	result = PyMem_Malloc(size);
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	477	if (result == NULL) {
				478	if (error_pos != NULL)
				479	*error_pos = (size_t)-1;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	480	return NULL;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	481	}
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	482	bytes = result;
				483	}
				484	return result;
Victor Stinner	27b1ca2	2012-12-03 12:47:59 +0100	[diff] [blame]	485	#endif /* __APPLE__ */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	486	}
				487
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	488	/* In principle, this should use HAVE__WSTAT, and _wstat
				489	should be detected by autoconf. However, no current
				490	POSIX system provides that function, so testing for
				491	it is pointless.
				492	Not sure whether the MS_WINDOWS guards are necessary:
				493	perhaps for cygwin/mingw builds?
				494	*/
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	495	#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	496
				497	/* Get file status. Encode the path to the locale encoding. */
				498
Victor Stinner	b306d75	2010-10-07 22:09:40 +0000	[diff] [blame]	499	int
				500	_Py_wstat(const wchar_t* path, struct stat *buf)
				501	{
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	502	int err;
				503	char *fname;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	504	fname = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	505	if (fname == NULL) {
				506	errno = EINVAL;
				507	return -1;
				508	}
				509	err = stat(fname, buf);
				510	PyMem_Free(fname);
				511	return err;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	512	}
				513	#endif
				514
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	515	#ifdef HAVE_STAT
				516
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	517	/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
				518	call stat() otherwise. Only fill st_mode attribute on Windows.
				519
Victor Stinner	bd0850b	2011-12-18 20:47:30 +0100	[diff] [blame]	520	Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
				521	raised. */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	522
				523	int
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	524	_Py_stat(PyObject path, struct stat statbuf)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	525	{
				526	#ifdef MS_WINDOWS
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	527	int err;
				528	struct _stat wstatbuf;
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	529	wchar_t *wpath;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	530
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	531	wpath = PyUnicode_AsUnicode(path);
				532	if (wpath == NULL)
Victor Stinner	bd0850b	2011-12-18 20:47:30 +0100	[diff] [blame]	533	return -2;
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	534	err = _wstat(wpath, &wstatbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	535	if (!err)
				536	statbuf->st_mode = wstatbuf.st_mode;
				537	return err;
				538	#else
				539	int ret;
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	540	PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	541	if (bytes == NULL)
Victor Stinner	bd0850b	2011-12-18 20:47:30 +0100	[diff] [blame]	542	return -2;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	543	ret = stat(PyBytes_AS_STRING(bytes), statbuf);
				544	Py_DECREF(bytes);
				545	return ret;
				546	#endif
				547	}
				548
Victor Stinner	20b654a	2013-01-03 01:08:58 +0100	[diff] [blame]	549	#endif
				550
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	551	/* Open a file. Use _wfopen() on Windows, encode the path to the locale
				552	encoding and use fopen() otherwise. */
				553
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	554	FILE *
				555	_Py_wfopen(const wchar_t path, const wchar_t mode)
				556	{
				557	#ifndef MS_WINDOWS
				558	FILE *f;
				559	char *cpath;
				560	char cmode[10];
				561	size_t r;
				562	r = wcstombs(cmode, mode, 10);
				563	if (r == (size_t)-1 \|\| r >= 10) {
				564	errno = EINVAL;
				565	return NULL;
				566	}
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	567	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	568	if (cpath == NULL)
				569	return NULL;
				570	f = fopen(cpath, cmode);
				571	PyMem_Free(cpath);
				572	return f;
				573	#else
				574	return _wfopen(path, mode);
				575	#endif
				576	}
				577
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	578	/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
				579	call fopen() otherwise.
				580
				581	Return the new file object on success, or NULL if the file cannot be open or
				582	(if PyErr_Occurred()) on unicode error */
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	583
				584	FILE*
Victor Stinner	a4a7595	2010-10-07 22:23:10 +0000	[diff] [blame]	585	_Py_fopen(PyObject path, const char mode)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	586	{
				587	#ifdef MS_WINDOWS
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	588	wchar_t *wpath;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	589	wchar_t wmode[10];
				590	int usize;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	591
Antoine Pitrou	0e576f1	2011-12-22 10:03:38 +0100	[diff] [blame]	592	if (!PyUnicode_Check(path)) {
				593	PyErr_Format(PyExc_TypeError,
				594	"str file path expected under Windows, got %R",
				595	Py_TYPE(path));
				596	return NULL;
				597	}
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	598	wpath = PyUnicode_AsUnicode(path);
				599	if (wpath == NULL)
				600	return NULL;
				601
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	602	usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
				603	if (usize == 0)
				604	return NULL;
				605
Victor Stinner	ee587ea	2011-11-17 00:51:38 +0100	[diff] [blame]	606	return _wfopen(wpath, wmode);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	607	#else
				608	FILE *f;
Antoine Pitrou	2b1cc89	2011-12-19 18:19:06 +0100	[diff] [blame]	609	PyObject *bytes;
				610	if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	611	return NULL;
				612	f = fopen(PyBytes_AS_STRING(bytes), mode);
				613	Py_DECREF(bytes);
				614	return f;
				615	#endif
				616	}
				617
				618	#ifdef HAVE_READLINK
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	619
				620	/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	621	the result from the locale encoding. Return -1 on error. */
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	622
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	623	int
				624	_Py_wreadlink(const wchar_t path, wchar_t buf, size_t bufsiz)
				625	{
				626	char *cpath;
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	627	char cbuf[MAXPATHLEN];
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	628	wchar_t *wbuf;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	629	int res;
				630	size_t r1;
				631
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	632	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	633	if (cpath == NULL) {
				634	errno = EINVAL;
				635	return -1;
				636	}
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	637	res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	638	PyMem_Free(cpath);
				639	if (res == -1)
				640	return -1;
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	641	if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	642	errno = EINVAL;
				643	return -1;
				644	}
				645	cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	646	wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner	350147b	2010-10-16 22:52:09 +0000	[diff] [blame]	647	if (wbuf == NULL) {
				648	errno = EINVAL;
				649	return -1;
				650	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	651	if (bufsiz <= r1) {
				652	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	653	errno = EINVAL;
				654	return -1;
				655	}
Victor Stinner	3f711f4	2010-10-16 22:47:37 +0000	[diff] [blame]	656	wcsncpy(buf, wbuf, bufsiz);
				657	PyMem_Free(wbuf);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	658	return (int)r1;
				659	}
				660	#endif
				661
				662	#ifdef HAVE_REALPATH
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	663
				664	/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	665	encoding, decode the result from the locale encoding.
				666	Return NULL on error. */
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	667
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	668	wchar_t*
Victor Stinner	015f4d8	2010-10-07 22:29:53 +0000	[diff] [blame]	669	_Py_wrealpath(const wchar_t *path,
				670	wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	671	{
				672	char *cpath;
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	673	char cresolved_path[MAXPATHLEN];
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	674	wchar_t *wresolved_path;
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	675	char *res;
				676	size_t r;
Victor Stinner	2f02a51	2010-11-08 22:43:46 +0000	[diff] [blame]	677	cpath = _Py_wchar2char(path, NULL);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	678	if (cpath == NULL) {
				679	errno = EINVAL;
				680	return NULL;
				681	}
				682	res = realpath(cpath, cresolved_path);
				683	PyMem_Free(cpath);
				684	if (res == NULL)
				685	return NULL;
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	686
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	687	wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	688	if (wresolved_path == NULL) {
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	689	errno = EINVAL;
				690	return NULL;
				691	}
Victor Stinner	0a1b8cb	2010-10-16 22:55:47 +0000	[diff] [blame]	692	if (resolved_path_size <= r) {
				693	PyMem_Free(wresolved_path);
				694	errno = EINVAL;
				695	return NULL;
				696	}
				697	wcsncpy(resolved_path, wresolved_path, resolved_path_size);
				698	PyMem_Free(wresolved_path);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	699	return resolved_path;
				700	}
				701	#endif
				702
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	703	/* Get the current directory. size is the buffer size in wide characters
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	704	including the null character. Decode the path from the locale encoding.
				705	Return NULL on error. */
Victor Stinner	6672d0c	2010-10-07 22:53:43 +0000	[diff] [blame]	706
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	707	wchar_t*
				708	_Py_wgetcwd(wchar_t *buf, size_t size)
				709	{
				710	#ifdef MS_WINDOWS
				711	return _wgetcwd(buf, size);
				712	#else
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	713	char fname[MAXPATHLEN];
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	714	wchar_t *wname;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	715	size_t len;
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	716
Victor Stinner	b11d6cb	2013-11-15 18:14:11 +0100	[diff] [blame]	717	if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	718	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	719	wname = _Py_char2wchar(fname, &len);
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	720	if (wname == NULL)
				721	return NULL;
Victor Stinner	168e117	2010-10-16 23:16:16 +0000	[diff] [blame]	722	if (size <= len) {
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	723	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	724	return NULL;
				725	}
Victor Stinner	f4061da	2010-10-14 12:37:19 +0000	[diff] [blame]	726	wcsncpy(buf, wname, size);
				727	PyMem_Free(wname);
Victor Stinner	4e31443	2010-10-07 21:45:39 +0000	[diff] [blame]	728	return buf;
				729	#endif
				730	}
				731