blob: 065d3fd974191c1f70536b94fd6eaa1a131e0125 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
6# include <windows.h>
7#endif
Victor Stinner4e314432010-10-07 21:45:39 +00008
Brett Cannonefb00c02012-02-29 18:31:31 -05009#ifdef HAVE_LANGINFO_H
10#include <langinfo.h>
11#endif
12
Victor Stinnerdaf45552013-08-28 00:53:59 +020013#ifdef HAVE_SYS_IOCTL_H
14#include <sys/ioctl.h>
15#endif
16
17#ifdef HAVE_FCNTL_H
18#include <fcntl.h>
19#endif /* HAVE_FCNTL_H */
20
Victor Stinnere2623772012-11-12 23:04:02 +010021#ifdef __APPLE__
22extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
23#endif
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020026/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020027
28 -1: unknown
29 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
30 1: open() supports O_CLOEXEC flag, close-on-exec is set
31
32 The flag is used by _Py_open(), io.FileIO and os.open() */
33int _Py_open_cloexec_works = -1;
34#endif
35
Brett Cannonefb00c02012-02-29 18:31:31 -050036PyObject *
37_Py_device_encoding(int fd)
38{
Victor Stinner14b9b112013-06-25 00:37:25 +020039#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050040 UINT cp;
41#endif
42 if (!_PyVerify_fd(fd) || !isatty(fd)) {
43 Py_RETURN_NONE;
44 }
Victor Stinner14b9b112013-06-25 00:37:25 +020045#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 if (fd == 0)
47 cp = GetConsoleCP();
48 else if (fd == 1 || fd == 2)
49 cp = GetConsoleOutputCP();
50 else
51 cp = 0;
52 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
53 has no console */
54 if (cp != 0)
55 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
56#elif defined(CODESET)
57 {
58 char *codeset = nl_langinfo(CODESET);
59 if (codeset != NULL && codeset[0] != 0)
60 return PyUnicode_FromString(codeset);
61 }
62#endif
63 Py_RETURN_NONE;
64}
65
Victor Stinnerd45c7f82012-12-04 01:34:47 +010066#if !defined(__APPLE__) && !defined(MS_WINDOWS)
67extern int _Py_normalize_encoding(const char *, char *, size_t);
68
69/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
70 On these operating systems, nl_langinfo(CODESET) announces an alias of the
71 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
72 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
73 locale.getpreferredencoding() codec. For example, if command line arguments
74 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
75 UnicodeEncodeError instead of retrieving the original byte string.
76
77 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
78 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
79 one byte in range 0x80-0xff can be decoded from the locale encoding. The
80 workaround is also enabled on error, for example if getting the locale
81 failed.
82
Philip Jenvey215c49a2013-01-15 13:24:12 -080083 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010084
85 1: the workaround is used: _Py_wchar2char() uses
86 encode_ascii_surrogateescape() and _Py_char2wchar() uses
87 decode_ascii_surrogateescape()
88 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
89 _Py_char2wchar() uses mbstowcs()
90 -1: unknown, need to call check_force_ascii() to get the value
91*/
92static int force_ascii = -1;
93
94static int
95check_force_ascii(void)
96{
97 char *loc;
98#if defined(HAVE_LANGINFO_H) && defined(CODESET)
99 char *codeset, **alias;
100 char encoding[100];
101 int is_ascii;
102 unsigned int i;
103 char* ascii_aliases[] = {
104 "ascii",
105 "646",
106 "ansi-x3.4-1968",
107 "ansi-x3-4-1968",
108 "ansi-x3.4-1986",
109 "cp367",
110 "csascii",
111 "ibm367",
112 "iso646-us",
113 "iso-646.irv-1991",
114 "iso-ir-6",
115 "us",
116 "us-ascii",
117 NULL
118 };
119#endif
120
121 loc = setlocale(LC_CTYPE, NULL);
122 if (loc == NULL)
123 goto error;
124 if (strcmp(loc, "C") != 0) {
125 /* the LC_CTYPE locale is different than C */
126 return 0;
127 }
128
129#if defined(HAVE_LANGINFO_H) && defined(CODESET)
130 codeset = nl_langinfo(CODESET);
131 if (!codeset || codeset[0] == '\0') {
132 /* CODESET is not set or empty */
133 goto error;
134 }
135 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
136 goto error;
137
138 is_ascii = 0;
139 for (alias=ascii_aliases; *alias != NULL; alias++) {
140 if (strcmp(encoding, *alias) == 0) {
141 is_ascii = 1;
142 break;
143 }
144 }
145 if (!is_ascii) {
146 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
147 return 0;
148 }
149
150 for (i=0x80; i<0xff; i++) {
151 unsigned char ch;
152 wchar_t wch;
153 size_t res;
154
155 ch = (unsigned char)i;
156 res = mbstowcs(&wch, (char*)&ch, 1);
157 if (res != (size_t)-1) {
158 /* decoding a non-ASCII character from the locale encoding succeed:
159 the locale encoding is not ASCII, force ASCII */
160 return 1;
161 }
162 }
163 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
164 encoding: the locale encoding is really ASCII */
165 return 0;
166#else
167 /* nl_langinfo(CODESET) is not available: always force ASCII */
168 return 1;
169#endif
170
171error:
172 /* if an error occured, force the ASCII encoding */
173 return 1;
174}
175
176static char*
177encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
178{
179 char *result = NULL, *out;
180 size_t len, i;
181 wchar_t ch;
182
183 if (error_pos != NULL)
184 *error_pos = (size_t)-1;
185
186 len = wcslen(text);
187
188 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
189 if (result == NULL)
190 return NULL;
191
192 out = result;
193 for (i=0; i<len; i++) {
194 ch = text[i];
195
196 if (ch <= 0x7f) {
197 /* ASCII character */
198 *out++ = (char)ch;
199 }
200 else if (0xdc80 <= ch && ch <= 0xdcff) {
201 /* UTF-8b surrogate */
202 *out++ = (char)(ch - 0xdc00);
203 }
204 else {
205 if (error_pos != NULL)
206 *error_pos = i;
207 PyMem_Free(result);
208 return NULL;
209 }
210 }
211 *out = '\0';
212 return result;
213}
214#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
215
216#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
217static wchar_t*
218decode_ascii_surrogateescape(const char *arg, size_t *size)
219{
220 wchar_t *res;
221 unsigned char *in;
222 wchar_t *out;
223
Victor Stinner65bf9cf2013-07-07 16:35:54 +0200224 res = PyMem_RawMalloc((strlen(arg)+1)*sizeof(wchar_t));
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100225 if (!res)
226 return NULL;
227
228 in = (unsigned char*)arg;
229 out = res;
230 while(*in)
231 if(*in < 128)
232 *out++ = *in++;
233 else
234 *out++ = 0xdc00 + *in++;
235 *out = 0;
236 if (size != NULL)
237 *size = out - res;
238 return res;
239}
240#endif
241
Victor Stinner4e314432010-10-07 21:45:39 +0000242
243/* Decode a byte string from the locale encoding with the
244 surrogateescape error handler (undecodable bytes are decoded as characters
245 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
246 character, escape the bytes using the surrogateescape error handler instead
247 of decoding them.
248
249 Use _Py_wchar2char() to encode the character string back to a byte string.
250
Victor Stinner168e1172010-10-16 23:16:16 +0000251 Return a pointer to a newly allocated wide character string (use
Victor Stinner1a7425f2013-07-07 16:25:15 +0200252 PyMem_RawFree() to free the memory) and write the number of written wide
Victor Stinner168e1172010-10-16 23:16:16 +0000253 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100254 NULL on error (decoding or memory allocation error). If size is not NULL,
255 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
256 error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000257
258 Conversion errors should never happen, unless there is a bug in the C
259 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000260wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000261_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000262{
Victor Stinnere2623772012-11-12 23:04:02 +0100263#ifdef __APPLE__
264 wchar_t *wstr;
265 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100266 if (size != NULL) {
267 if (wstr != NULL)
268 *size = wcslen(wstr);
269 else
270 *size = (size_t)-1;
271 }
Victor Stinnere2623772012-11-12 23:04:02 +0100272 return wstr;
273#else
Victor Stinner4e314432010-10-07 21:45:39 +0000274 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100275 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000276 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200277#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000278 unsigned char *in;
279 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000280 mbstate_t mbs;
281#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100282
283#ifndef MS_WINDOWS
284 if (force_ascii == -1)
285 force_ascii = check_force_ascii();
286
287 if (force_ascii) {
288 /* force ASCII encoding to workaround mbstowcs() issue */
289 res = decode_ascii_surrogateescape(arg, size);
290 if (res == NULL)
291 goto oom;
292 return res;
293 }
294#endif
295
296#ifdef HAVE_BROKEN_MBSTOWCS
297 /* Some platforms have a broken implementation of
298 * mbstowcs which does not count the characters that
299 * would result from conversion. Use an upper bound.
300 */
301 argsize = strlen(arg);
302#else
303 argsize = mbstowcs(NULL, arg, 0);
304#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000305 if (argsize != (size_t)-1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200306 res = (wchar_t *)PyMem_RawMalloc((argsize+1)*sizeof(wchar_t));
Victor Stinner4e314432010-10-07 21:45:39 +0000307 if (!res)
308 goto oom;
309 count = mbstowcs(res, arg, argsize+1);
310 if (count != (size_t)-1) {
311 wchar_t *tmp;
312 /* Only use the result if it contains no
313 surrogate characters. */
314 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100315 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000316 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000317 if (*tmp == 0) {
318 if (size != NULL)
319 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000320 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000321 }
Victor Stinner4e314432010-10-07 21:45:39 +0000322 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200323 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000324 }
325 /* Conversion failed. Fall back to escaping with surrogateescape. */
326#ifdef HAVE_MBRTOWC
327 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
328
329 /* Overallocate; as multi-byte characters are in the argument, the
330 actual output could use less memory. */
331 argsize = strlen(arg) + 1;
Victor Stinner1a7425f2013-07-07 16:25:15 +0200332 res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000333 if (!res)
334 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000335 in = (unsigned char*)arg;
336 out = res;
337 memset(&mbs, 0, sizeof mbs);
338 while (argsize) {
339 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
340 if (converted == 0)
341 /* Reached end of string; null char stored. */
342 break;
343 if (converted == (size_t)-2) {
344 /* Incomplete character. This should never happen,
345 since we provide everything that we have -
346 unless there is a bug in the C library, or I
347 misunderstood how mbrtowc works. */
Victor Stinner1a7425f2013-07-07 16:25:15 +0200348 PyMem_RawFree(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100349 if (size != NULL)
350 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000351 return NULL;
352 }
353 if (converted == (size_t)-1) {
354 /* Conversion error. Escape as UTF-8b, and start over
355 in the initial shift state. */
356 *out++ = 0xdc00 + *in++;
357 argsize--;
358 memset(&mbs, 0, sizeof mbs);
359 continue;
360 }
Victor Stinner76df43d2012-10-30 01:42:39 +0100361 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000362 /* Surrogate character. Escape the original
363 byte sequence with surrogateescape. */
364 argsize -= converted;
365 while (converted--)
366 *out++ = 0xdc00 + *in++;
367 continue;
368 }
369 /* successfully converted some bytes */
370 in += converted;
371 argsize -= converted;
372 out++;
373 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100374 if (size != NULL)
375 *size = out - res;
Victor Stinnere2623772012-11-12 23:04:02 +0100376#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000377 /* Cannot use C locale for escaping; manually escape as if charset
378 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
379 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100380 res = decode_ascii_surrogateescape(arg, size);
381 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100382 goto oom;
Victor Stinnere2623772012-11-12 23:04:02 +0100383#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000384 return res;
385oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100386 if (size != NULL)
387 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000388 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100389#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000390}
391
392/* Encode a (wide) character string to the locale encoding with the
393 surrogateescape error handler (characters in range U+DC80..U+DCFF are
394 converted to bytes 0x80..0xFF).
395
396 This function is the reverse of _Py_char2wchar().
397
398 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100399 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000400
401 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100402 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000403char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000404_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000405{
Victor Stinnere2623772012-11-12 23:04:02 +0100406#ifdef __APPLE__
407 Py_ssize_t len;
408 PyObject *unicode, *bytes = NULL;
409 char *cpath;
410
411 unicode = PyUnicode_FromWideChar(text, wcslen(text));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100412 if (unicode == NULL)
Victor Stinnere2623772012-11-12 23:04:02 +0100413 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100414
415 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
416 Py_DECREF(unicode);
417 if (bytes == NULL) {
418 PyErr_Clear();
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100419 if (error_pos != NULL)
420 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100421 return NULL;
422 }
423
424 len = PyBytes_GET_SIZE(bytes);
425 cpath = PyMem_Malloc(len+1);
426 if (cpath == NULL) {
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100427 PyErr_Clear();
Victor Stinnere2623772012-11-12 23:04:02 +0100428 Py_DECREF(bytes);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100429 if (error_pos != NULL)
430 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100431 return NULL;
432 }
433 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
434 Py_DECREF(bytes);
435 return cpath;
436#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000437 const size_t len = wcslen(text);
438 char *result = NULL, *bytes = NULL;
439 size_t i, size, converted;
440 wchar_t c, buf[2];
441
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100442#ifndef MS_WINDOWS
443 if (force_ascii == -1)
444 force_ascii = check_force_ascii();
445
446 if (force_ascii)
447 return encode_ascii_surrogateescape(text, error_pos);
448#endif
449
Victor Stinner4e314432010-10-07 21:45:39 +0000450 /* The function works in two steps:
451 1. compute the length of the output buffer in bytes (size)
452 2. outputs the bytes */
453 size = 0;
454 buf[1] = 0;
455 while (1) {
456 for (i=0; i < len; i++) {
457 c = text[i];
458 if (c >= 0xdc80 && c <= 0xdcff) {
459 /* UTF-8b surrogate */
460 if (bytes != NULL) {
461 *bytes++ = c - 0xdc00;
462 size--;
463 }
464 else
465 size++;
466 continue;
467 }
468 else {
469 buf[0] = c;
470 if (bytes != NULL)
471 converted = wcstombs(bytes, buf, size);
472 else
473 converted = wcstombs(NULL, buf, 0);
474 if (converted == (size_t)-1) {
475 if (result != NULL)
476 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000477 if (error_pos != NULL)
478 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000479 return NULL;
480 }
481 if (bytes != NULL) {
482 bytes += converted;
483 size -= converted;
484 }
485 else
486 size += converted;
487 }
488 }
489 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100490 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000491 break;
492 }
493
494 size += 1; /* nul byte at the end */
495 result = PyMem_Malloc(size);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100496 if (result == NULL) {
497 if (error_pos != NULL)
498 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000499 return NULL;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100500 }
Victor Stinner4e314432010-10-07 21:45:39 +0000501 bytes = result;
502 }
503 return result;
Victor Stinnere2623772012-11-12 23:04:02 +0100504#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000505}
506
Victor Stinner4e314432010-10-07 21:45:39 +0000507/* In principle, this should use HAVE__WSTAT, and _wstat
508 should be detected by autoconf. However, no current
509 POSIX system provides that function, so testing for
510 it is pointless.
511 Not sure whether the MS_WINDOWS guards are necessary:
512 perhaps for cygwin/mingw builds?
513*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000514#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000515
516/* Get file status. Encode the path to the locale encoding. */
517
Victor Stinnerb306d752010-10-07 22:09:40 +0000518int
519_Py_wstat(const wchar_t* path, struct stat *buf)
520{
Victor Stinner4e314432010-10-07 21:45:39 +0000521 int err;
522 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000523 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000524 if (fname == NULL) {
525 errno = EINVAL;
526 return -1;
527 }
528 err = stat(fname, buf);
529 PyMem_Free(fname);
530 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000531}
532#endif
533
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100534#ifdef HAVE_STAT
535
Victor Stinner6672d0c2010-10-07 22:53:43 +0000536/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
537 call stat() otherwise. Only fill st_mode attribute on Windows.
538
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100539 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
540 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000541
542int
Victor Stinnera4a75952010-10-07 22:23:10 +0000543_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000544{
545#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000546 int err;
547 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100548 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000549
Victor Stinneree587ea2011-11-17 00:51:38 +0100550 wpath = PyUnicode_AsUnicode(path);
551 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100552 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100553 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000554 if (!err)
555 statbuf->st_mode = wstatbuf.st_mode;
556 return err;
557#else
558 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000559 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000560 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100561 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000562 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
563 Py_DECREF(bytes);
564 return ret;
565#endif
566}
567
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100568#endif
569
Antoine Pitrou409b5382013-10-12 22:41:17 +0200570static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200571get_inheritable(int fd, int raise)
572{
573#ifdef MS_WINDOWS
574 HANDLE handle;
575 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000576
Victor Stinnerdaf45552013-08-28 00:53:59 +0200577 if (!_PyVerify_fd(fd)) {
578 if (raise)
579 PyErr_SetFromErrno(PyExc_OSError);
580 return -1;
581 }
582
583 handle = (HANDLE)_get_osfhandle(fd);
584 if (handle == INVALID_HANDLE_VALUE) {
585 if (raise)
586 PyErr_SetFromWindowsErr(0);
587 return -1;
588 }
589
590 if (!GetHandleInformation(handle, &flags)) {
591 if (raise)
592 PyErr_SetFromWindowsErr(0);
593 return -1;
594 }
595
596 return (flags & HANDLE_FLAG_INHERIT);
597#else
598 int flags;
599
600 flags = fcntl(fd, F_GETFD, 0);
601 if (flags == -1) {
602 if (raise)
603 PyErr_SetFromErrno(PyExc_OSError);
604 return -1;
605 }
606 return !(flags & FD_CLOEXEC);
607#endif
608}
609
610/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200611 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200612 raise an exception and return -1 on error. */
613int
614_Py_get_inheritable(int fd)
615{
616 return get_inheritable(fd, 1);
617}
618
619static int
620set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
621{
622#ifdef MS_WINDOWS
623 HANDLE handle;
624 DWORD flags;
625#elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
626 int request;
627 int err;
628#elif defined(HAVE_FCNTL_H)
629 int flags;
630 int res;
631#endif
632
633 /* atomic_flag_works can only be used to make the file descriptor
634 non-inheritable */
635 assert(!(atomic_flag_works != NULL && inheritable));
636
637 if (atomic_flag_works != NULL && !inheritable) {
638 if (*atomic_flag_works == -1) {
639 int inheritable = get_inheritable(fd, raise);
640 if (inheritable == -1)
641 return -1;
642 *atomic_flag_works = !inheritable;
643 }
644
645 if (*atomic_flag_works)
646 return 0;
647 }
648
649#ifdef MS_WINDOWS
650 if (!_PyVerify_fd(fd)) {
651 if (raise)
652 PyErr_SetFromErrno(PyExc_OSError);
653 return -1;
654 }
655
656 handle = (HANDLE)_get_osfhandle(fd);
657 if (handle == INVALID_HANDLE_VALUE) {
658 if (raise)
659 PyErr_SetFromWindowsErr(0);
660 return -1;
661 }
662
663 if (inheritable)
664 flags = HANDLE_FLAG_INHERIT;
665 else
666 flags = 0;
667 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
668 if (raise)
669 PyErr_SetFromWindowsErr(0);
670 return -1;
671 }
672 return 0;
673
674#elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
675 if (inheritable)
676 request = FIONCLEX;
677 else
678 request = FIOCLEX;
Stefan Krah49d04792013-11-14 15:35:47 +0100679 err = ioctl(fd, request, NULL);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200680 if (err) {
681 if (raise)
682 PyErr_SetFromErrno(PyExc_OSError);
683 return -1;
684 }
685 return 0;
686
687#else
688 flags = fcntl(fd, F_GETFD);
689 if (flags < 0) {
690 if (raise)
691 PyErr_SetFromErrno(PyExc_OSError);
692 return -1;
693 }
694
695 if (inheritable)
696 flags &= ~FD_CLOEXEC;
697 else
698 flags |= FD_CLOEXEC;
699 res = fcntl(fd, F_SETFD, flags);
700 if (res < 0) {
701 if (raise)
702 PyErr_SetFromErrno(PyExc_OSError);
703 return -1;
704 }
705 return 0;
706#endif
707}
708
709/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200710 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200711static int
712make_non_inheritable(int fd)
713{
714 return set_inheritable(fd, 0, 0, NULL);
715}
716
717/* Set the inheritable flag of the specified file descriptor.
718 On success: return 0, on error: raise an exception if raise is nonzero
719 and return -1.
720
721 If atomic_flag_works is not NULL:
722
723 * if *atomic_flag_works==-1, check if the inheritable is set on the file
724 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
725 set the inheritable flag
726 * if *atomic_flag_works==1: do nothing
727 * if *atomic_flag_works==0: set inheritable flag to False
728
729 Set atomic_flag_works to NULL if no atomic flag was used to create the
730 file descriptor.
731
732 atomic_flag_works can only be used to make a file descriptor
733 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
734int
735_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
736{
737 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
738}
739
740/* Open a file with the specified flags (wrapper to open() function).
741 The file descriptor is created non-inheritable. */
742int
743_Py_open(const char *pathname, int flags)
744{
745 int fd;
746#ifdef MS_WINDOWS
747 fd = open(pathname, flags | O_NOINHERIT);
748 if (fd < 0)
749 return fd;
750#else
751
752 int *atomic_flag_works;
753#ifdef O_CLOEXEC
754 atomic_flag_works = &_Py_open_cloexec_works;
755 flags |= O_CLOEXEC;
756#else
757 atomic_flag_works = NULL;
758#endif
759 fd = open(pathname, flags);
760 if (fd < 0)
761 return fd;
762
763 if (set_inheritable(fd, 0, 0, atomic_flag_works) < 0) {
764 close(fd);
765 return -1;
766 }
767#endif /* !MS_WINDOWS */
768 return fd;
769}
770
771/* Open a file. Use _wfopen() on Windows, encode the path to the locale
772 encoding and use fopen() otherwise. The file descriptor is created
773 non-inheritable. */
Victor Stinner4e314432010-10-07 21:45:39 +0000774FILE *
775_Py_wfopen(const wchar_t *path, const wchar_t *mode)
776{
Victor Stinner4e314432010-10-07 21:45:39 +0000777 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200778#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000779 char *cpath;
780 char cmode[10];
781 size_t r;
782 r = wcstombs(cmode, mode, 10);
783 if (r == (size_t)-1 || r >= 10) {
784 errno = EINVAL;
785 return NULL;
786 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000787 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000788 if (cpath == NULL)
789 return NULL;
790 f = fopen(cpath, cmode);
791 PyMem_Free(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +0000792#else
Victor Stinnerdaf45552013-08-28 00:53:59 +0200793 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +0000794#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200795 if (f == NULL)
796 return NULL;
797 if (make_non_inheritable(fileno(f)) < 0) {
798 fclose(f);
799 return NULL;
800 }
801 return f;
Victor Stinner4e314432010-10-07 21:45:39 +0000802}
803
Victor Stinnerdaf45552013-08-28 00:53:59 +0200804/* Wrapper to fopen(). The file descriptor is created non-inheritable. */
805FILE*
806_Py_fopen(const char *pathname, const char *mode)
807{
808 FILE *f = fopen(pathname, mode);
809 if (f == NULL)
810 return NULL;
811 if (make_non_inheritable(fileno(f)) < 0) {
812 fclose(f);
813 return NULL;
814 }
815 return f;
816}
817
818/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
819 encoding and call fopen() otherwise. The file descriptor is created
820 non-inheritable.
Victor Stinner6672d0c2010-10-07 22:53:43 +0000821
822 Return the new file object on success, or NULL if the file cannot be open or
Victor Stinnerdaf45552013-08-28 00:53:59 +0200823 (if PyErr_Occurred()) on unicode error. */
Victor Stinner4e314432010-10-07 21:45:39 +0000824FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +0200825_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000826{
Victor Stinnerdaf45552013-08-28 00:53:59 +0200827 FILE *f;
Victor Stinner4e314432010-10-07 21:45:39 +0000828#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100829 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000830 wchar_t wmode[10];
831 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000832
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100833 if (!PyUnicode_Check(path)) {
834 PyErr_Format(PyExc_TypeError,
835 "str file path expected under Windows, got %R",
836 Py_TYPE(path));
837 return NULL;
838 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100839 wpath = PyUnicode_AsUnicode(path);
840 if (wpath == NULL)
841 return NULL;
842
Victor Stinner4e314432010-10-07 21:45:39 +0000843 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
844 if (usize == 0)
845 return NULL;
846
Victor Stinnerdaf45552013-08-28 00:53:59 +0200847 f = _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000848#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100849 PyObject *bytes;
850 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000851 return NULL;
852 f = fopen(PyBytes_AS_STRING(bytes), mode);
853 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +0000854#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200855 if (f == NULL)
856 return NULL;
857 if (make_non_inheritable(fileno(f)) < 0) {
858 fclose(f);
859 return NULL;
860 }
861 return f;
Victor Stinner4e314432010-10-07 21:45:39 +0000862}
863
864#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000865
866/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100867 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000868
Victor Stinner4e314432010-10-07 21:45:39 +0000869int
870_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
871{
872 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100873 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +0000874 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000875 int res;
876 size_t r1;
877
Victor Stinner2f02a512010-11-08 22:43:46 +0000878 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000879 if (cpath == NULL) {
880 errno = EINVAL;
881 return -1;
882 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100883 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner4e314432010-10-07 21:45:39 +0000884 PyMem_Free(cpath);
885 if (res == -1)
886 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100887 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000888 errno = EINVAL;
889 return -1;
890 }
891 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000892 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000893 if (wbuf == NULL) {
894 errno = EINVAL;
895 return -1;
896 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000897 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200898 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000899 errno = EINVAL;
900 return -1;
901 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000902 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200903 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000904 return (int)r1;
905}
906#endif
907
908#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000909
910/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100911 encoding, decode the result from the locale encoding.
912 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000913
Victor Stinner4e314432010-10-07 21:45:39 +0000914wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000915_Py_wrealpath(const wchar_t *path,
916 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000917{
918 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100919 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000920 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000921 char *res;
922 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000923 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000924 if (cpath == NULL) {
925 errno = EINVAL;
926 return NULL;
927 }
928 res = realpath(cpath, cresolved_path);
929 PyMem_Free(cpath);
930 if (res == NULL)
931 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000932
Victor Stinner168e1172010-10-16 23:16:16 +0000933 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000934 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000935 errno = EINVAL;
936 return NULL;
937 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000938 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200939 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000940 errno = EINVAL;
941 return NULL;
942 }
943 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200944 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000945 return resolved_path;
946}
947#endif
948
Victor Stinnerf4061da2010-10-14 12:37:19 +0000949/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100950 including the null character. Decode the path from the locale encoding.
951 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000952
Victor Stinner4e314432010-10-07 21:45:39 +0000953wchar_t*
954_Py_wgetcwd(wchar_t *buf, size_t size)
955{
956#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +0200957 int isize = (int)Py_MIN(size, INT_MAX);
958 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +0000959#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100960 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000961 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000962 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000963
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100964 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +0000965 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000966 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000967 if (wname == NULL)
968 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000969 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200970 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000971 return NULL;
972 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000973 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200974 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000975 return buf;
976#endif
977}
978
Victor Stinnerdaf45552013-08-28 00:53:59 +0200979/* Duplicate a file descriptor. The new file descriptor is created as
980 non-inheritable. Return a new file descriptor on success, raise an OSError
981 exception and return -1 on error.
982
983 The GIL is released to call dup(). The caller must hold the GIL. */
984int
985_Py_dup(int fd)
986{
987#ifdef MS_WINDOWS
988 HANDLE handle;
989 DWORD ftype;
990#endif
991
992 if (!_PyVerify_fd(fd)) {
993 PyErr_SetFromErrno(PyExc_OSError);
994 return -1;
995 }
996
997#ifdef MS_WINDOWS
998 handle = (HANDLE)_get_osfhandle(fd);
999 if (handle == INVALID_HANDLE_VALUE) {
1000 PyErr_SetFromWindowsErr(0);
1001 return -1;
1002 }
1003
1004 /* get the file type, ignore the error if it failed */
1005 ftype = GetFileType(handle);
1006
1007 Py_BEGIN_ALLOW_THREADS
1008 fd = dup(fd);
1009 Py_END_ALLOW_THREADS
1010 if (fd < 0) {
1011 PyErr_SetFromErrno(PyExc_OSError);
1012 return -1;
1013 }
1014
1015 /* Character files like console cannot be make non-inheritable */
1016 if (ftype != FILE_TYPE_CHAR) {
1017 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1018 close(fd);
1019 return -1;
1020 }
1021 }
1022#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1023 Py_BEGIN_ALLOW_THREADS
1024 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1025 Py_END_ALLOW_THREADS
1026 if (fd < 0) {
1027 PyErr_SetFromErrno(PyExc_OSError);
1028 return -1;
1029 }
1030
1031#else
1032 Py_BEGIN_ALLOW_THREADS
1033 fd = dup(fd);
1034 Py_END_ALLOW_THREADS
1035 if (fd < 0) {
1036 PyErr_SetFromErrno(PyExc_OSError);
1037 return -1;
1038 }
1039
1040 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1041 close(fd);
1042 return -1;
1043 }
1044#endif
1045 return fd;
1046}
1047
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001048#ifndef MS_WINDOWS
1049/* Get the blocking mode of the file descriptor.
1050 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1051 raise an exception and return -1 on error. */
1052int
1053_Py_get_blocking(int fd)
1054{
1055 int flags = fcntl(fd, F_GETFL, 0);
1056 if (flags < 0) {
1057 PyErr_SetFromErrno(PyExc_OSError);
1058 return -1;
1059 }
1060
1061 return !(flags & O_NONBLOCK);
1062}
1063
1064/* Set the blocking mode of the specified file descriptor.
1065
1066 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1067 otherwise.
1068
1069 Return 0 on success, raise an exception and return -1 on error. */
1070int
1071_Py_set_blocking(int fd, int blocking)
1072{
1073#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1074 int arg = !blocking;
1075 if (ioctl(fd, FIONBIO, &arg) < 0)
1076 goto error;
1077#else
1078 int flags, res;
1079
1080 flags = fcntl(fd, F_GETFL, 0);
1081 if (flags < 0)
1082 goto error;
1083
1084 if (blocking)
1085 flags = flags & (~O_NONBLOCK);
1086 else
1087 flags = flags | O_NONBLOCK;
1088
1089 res = fcntl(fd, F_SETFL, flags);
1090 if (res < 0)
1091 goto error;
1092#endif
1093 return 0;
1094
1095error:
1096 PyErr_SetFromErrno(PyExc_OSError);
1097 return -1;
1098}
1099#endif
1100