blob: b504b15671671bb3d31dd01729d147bdaf29731e [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00003#ifdef MS_WINDOWS
4# include <windows.h>
5#endif
Victor Stinner4e314432010-10-07 21:45:39 +00006
Brett Cannonefb00c02012-02-29 18:31:31 -05007#ifdef HAVE_LANGINFO_H
Victor Stinnerd45c7f82012-12-04 01:34:47 +01008#include <locale.h>
Brett Cannonefb00c02012-02-29 18:31:31 -05009#include <langinfo.h>
10#endif
11
Victor Stinnerdaf45552013-08-28 00:53:59 +020012#ifdef HAVE_SYS_IOCTL_H
13#include <sys/ioctl.h>
14#endif
15
16#ifdef HAVE_FCNTL_H
17#include <fcntl.h>
18#endif /* HAVE_FCNTL_H */
19
Victor Stinnere2623772012-11-12 23:04:02 +010020#ifdef __APPLE__
21extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
22#endif
23
Victor Stinnerdaf45552013-08-28 00:53:59 +020024#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020025/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020026
27 -1: unknown
28 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29 1: open() supports O_CLOEXEC flag, close-on-exec is set
30
31 The flag is used by _Py_open(), io.FileIO and os.open() */
32int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
41 if (!_PyVerify_fd(fd) || !isatty(fd)) {
42 Py_RETURN_NONE;
43 }
Victor Stinner14b9b112013-06-25 00:37:25 +020044#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050045 if (fd == 0)
46 cp = GetConsoleCP();
47 else if (fd == 1 || fd == 2)
48 cp = GetConsoleOutputCP();
49 else
50 cp = 0;
51 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
52 has no console */
53 if (cp != 0)
54 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
55#elif defined(CODESET)
56 {
57 char *codeset = nl_langinfo(CODESET);
58 if (codeset != NULL && codeset[0] != 0)
59 return PyUnicode_FromString(codeset);
60 }
61#endif
62 Py_RETURN_NONE;
63}
64
Victor Stinnerd45c7f82012-12-04 01:34:47 +010065#if !defined(__APPLE__) && !defined(MS_WINDOWS)
66extern int _Py_normalize_encoding(const char *, char *, size_t);
67
68/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
69 On these operating systems, nl_langinfo(CODESET) announces an alias of the
70 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
71 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
72 locale.getpreferredencoding() codec. For example, if command line arguments
73 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
74 UnicodeEncodeError instead of retrieving the original byte string.
75
76 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
77 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
78 one byte in range 0x80-0xff can be decoded from the locale encoding. The
79 workaround is also enabled on error, for example if getting the locale
80 failed.
81
Philip Jenvey215c49a2013-01-15 13:24:12 -080082 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010083
84 1: the workaround is used: _Py_wchar2char() uses
85 encode_ascii_surrogateescape() and _Py_char2wchar() uses
86 decode_ascii_surrogateescape()
87 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
88 _Py_char2wchar() uses mbstowcs()
89 -1: unknown, need to call check_force_ascii() to get the value
90*/
91static int force_ascii = -1;
92
93static int
94check_force_ascii(void)
95{
96 char *loc;
97#if defined(HAVE_LANGINFO_H) && defined(CODESET)
98 char *codeset, **alias;
99 char encoding[100];
100 int is_ascii;
101 unsigned int i;
102 char* ascii_aliases[] = {
103 "ascii",
104 "646",
105 "ansi-x3.4-1968",
106 "ansi-x3-4-1968",
107 "ansi-x3.4-1986",
108 "cp367",
109 "csascii",
110 "ibm367",
111 "iso646-us",
112 "iso-646.irv-1991",
113 "iso-ir-6",
114 "us",
115 "us-ascii",
116 NULL
117 };
118#endif
119
120 loc = setlocale(LC_CTYPE, NULL);
121 if (loc == NULL)
122 goto error;
123 if (strcmp(loc, "C") != 0) {
124 /* the LC_CTYPE locale is different than C */
125 return 0;
126 }
127
128#if defined(HAVE_LANGINFO_H) && defined(CODESET)
129 codeset = nl_langinfo(CODESET);
130 if (!codeset || codeset[0] == '\0') {
131 /* CODESET is not set or empty */
132 goto error;
133 }
134 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
135 goto error;
136
137 is_ascii = 0;
138 for (alias=ascii_aliases; *alias != NULL; alias++) {
139 if (strcmp(encoding, *alias) == 0) {
140 is_ascii = 1;
141 break;
142 }
143 }
144 if (!is_ascii) {
145 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
146 return 0;
147 }
148
149 for (i=0x80; i<0xff; i++) {
150 unsigned char ch;
151 wchar_t wch;
152 size_t res;
153
154 ch = (unsigned char)i;
155 res = mbstowcs(&wch, (char*)&ch, 1);
156 if (res != (size_t)-1) {
157 /* decoding a non-ASCII character from the locale encoding succeed:
158 the locale encoding is not ASCII, force ASCII */
159 return 1;
160 }
161 }
162 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
163 encoding: the locale encoding is really ASCII */
164 return 0;
165#else
166 /* nl_langinfo(CODESET) is not available: always force ASCII */
167 return 1;
168#endif
169
170error:
171 /* if an error occured, force the ASCII encoding */
172 return 1;
173}
174
175static char*
176encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
177{
178 char *result = NULL, *out;
179 size_t len, i;
180 wchar_t ch;
181
182 if (error_pos != NULL)
183 *error_pos = (size_t)-1;
184
185 len = wcslen(text);
186
187 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
188 if (result == NULL)
189 return NULL;
190
191 out = result;
192 for (i=0; i<len; i++) {
193 ch = text[i];
194
195 if (ch <= 0x7f) {
196 /* ASCII character */
197 *out++ = (char)ch;
198 }
199 else if (0xdc80 <= ch && ch <= 0xdcff) {
200 /* UTF-8b surrogate */
201 *out++ = (char)(ch - 0xdc00);
202 }
203 else {
204 if (error_pos != NULL)
205 *error_pos = i;
206 PyMem_Free(result);
207 return NULL;
208 }
209 }
210 *out = '\0';
211 return result;
212}
213#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
214
215#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
216static wchar_t*
217decode_ascii_surrogateescape(const char *arg, size_t *size)
218{
219 wchar_t *res;
220 unsigned char *in;
221 wchar_t *out;
222
Victor Stinner65bf9cf2013-07-07 16:35:54 +0200223 res = PyMem_RawMalloc((strlen(arg)+1)*sizeof(wchar_t));
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100224 if (!res)
225 return NULL;
226
227 in = (unsigned char*)arg;
228 out = res;
229 while(*in)
230 if(*in < 128)
231 *out++ = *in++;
232 else
233 *out++ = 0xdc00 + *in++;
234 *out = 0;
235 if (size != NULL)
236 *size = out - res;
237 return res;
238}
239#endif
240
Victor Stinner4e314432010-10-07 21:45:39 +0000241
242/* Decode a byte string from the locale encoding with the
243 surrogateescape error handler (undecodable bytes are decoded as characters
244 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
245 character, escape the bytes using the surrogateescape error handler instead
246 of decoding them.
247
248 Use _Py_wchar2char() to encode the character string back to a byte string.
249
Victor Stinner168e1172010-10-16 23:16:16 +0000250 Return a pointer to a newly allocated wide character string (use
Victor Stinner1a7425f2013-07-07 16:25:15 +0200251 PyMem_RawFree() to free the memory) and write the number of written wide
Victor Stinner168e1172010-10-16 23:16:16 +0000252 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100253 NULL on error (decoding or memory allocation error). If size is not NULL,
254 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
255 error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000256
257 Conversion errors should never happen, unless there is a bug in the C
258 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000259wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000260_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000261{
Victor Stinnere2623772012-11-12 23:04:02 +0100262#ifdef __APPLE__
263 wchar_t *wstr;
264 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100265 if (size != NULL) {
266 if (wstr != NULL)
267 *size = wcslen(wstr);
268 else
269 *size = (size_t)-1;
270 }
Victor Stinnere2623772012-11-12 23:04:02 +0100271 return wstr;
272#else
Victor Stinner4e314432010-10-07 21:45:39 +0000273 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100274 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000275 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200276#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000277 unsigned char *in;
278 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000279 mbstate_t mbs;
280#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100281
282#ifndef MS_WINDOWS
283 if (force_ascii == -1)
284 force_ascii = check_force_ascii();
285
286 if (force_ascii) {
287 /* force ASCII encoding to workaround mbstowcs() issue */
288 res = decode_ascii_surrogateescape(arg, size);
289 if (res == NULL)
290 goto oom;
291 return res;
292 }
293#endif
294
295#ifdef HAVE_BROKEN_MBSTOWCS
296 /* Some platforms have a broken implementation of
297 * mbstowcs which does not count the characters that
298 * would result from conversion. Use an upper bound.
299 */
300 argsize = strlen(arg);
301#else
302 argsize = mbstowcs(NULL, arg, 0);
303#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000304 if (argsize != (size_t)-1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200305 res = (wchar_t *)PyMem_RawMalloc((argsize+1)*sizeof(wchar_t));
Victor Stinner4e314432010-10-07 21:45:39 +0000306 if (!res)
307 goto oom;
308 count = mbstowcs(res, arg, argsize+1);
309 if (count != (size_t)-1) {
310 wchar_t *tmp;
311 /* Only use the result if it contains no
312 surrogate characters. */
313 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100314 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000315 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000316 if (*tmp == 0) {
317 if (size != NULL)
318 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000319 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000320 }
Victor Stinner4e314432010-10-07 21:45:39 +0000321 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200322 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000323 }
324 /* Conversion failed. Fall back to escaping with surrogateescape. */
325#ifdef HAVE_MBRTOWC
326 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
327
328 /* Overallocate; as multi-byte characters are in the argument, the
329 actual output could use less memory. */
330 argsize = strlen(arg) + 1;
Victor Stinner1a7425f2013-07-07 16:25:15 +0200331 res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000332 if (!res)
333 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000334 in = (unsigned char*)arg;
335 out = res;
336 memset(&mbs, 0, sizeof mbs);
337 while (argsize) {
338 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
339 if (converted == 0)
340 /* Reached end of string; null char stored. */
341 break;
342 if (converted == (size_t)-2) {
343 /* Incomplete character. This should never happen,
344 since we provide everything that we have -
345 unless there is a bug in the C library, or I
346 misunderstood how mbrtowc works. */
Victor Stinner1a7425f2013-07-07 16:25:15 +0200347 PyMem_RawFree(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100348 if (size != NULL)
349 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000350 return NULL;
351 }
352 if (converted == (size_t)-1) {
353 /* Conversion error. Escape as UTF-8b, and start over
354 in the initial shift state. */
355 *out++ = 0xdc00 + *in++;
356 argsize--;
357 memset(&mbs, 0, sizeof mbs);
358 continue;
359 }
Victor Stinner76df43d2012-10-30 01:42:39 +0100360 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000361 /* Surrogate character. Escape the original
362 byte sequence with surrogateescape. */
363 argsize -= converted;
364 while (converted--)
365 *out++ = 0xdc00 + *in++;
366 continue;
367 }
368 /* successfully converted some bytes */
369 in += converted;
370 argsize -= converted;
371 out++;
372 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100373 if (size != NULL)
374 *size = out - res;
Victor Stinnere2623772012-11-12 23:04:02 +0100375#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000376 /* Cannot use C locale for escaping; manually escape as if charset
377 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
378 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100379 res = decode_ascii_surrogateescape(arg, size);
380 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100381 goto oom;
Victor Stinnere2623772012-11-12 23:04:02 +0100382#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000383 return res;
384oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100385 if (size != NULL)
386 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000387 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100388#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000389}
390
391/* Encode a (wide) character string to the locale encoding with the
392 surrogateescape error handler (characters in range U+DC80..U+DCFF are
393 converted to bytes 0x80..0xFF).
394
395 This function is the reverse of _Py_char2wchar().
396
397 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100398 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000399
400 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100401 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000402char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000403_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000404{
Victor Stinnere2623772012-11-12 23:04:02 +0100405#ifdef __APPLE__
406 Py_ssize_t len;
407 PyObject *unicode, *bytes = NULL;
408 char *cpath;
409
410 unicode = PyUnicode_FromWideChar(text, wcslen(text));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100411 if (unicode == NULL)
Victor Stinnere2623772012-11-12 23:04:02 +0100412 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100413
414 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
415 Py_DECREF(unicode);
416 if (bytes == NULL) {
417 PyErr_Clear();
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100418 if (error_pos != NULL)
419 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100420 return NULL;
421 }
422
423 len = PyBytes_GET_SIZE(bytes);
424 cpath = PyMem_Malloc(len+1);
425 if (cpath == NULL) {
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100426 PyErr_Clear();
Victor Stinnere2623772012-11-12 23:04:02 +0100427 Py_DECREF(bytes);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100428 if (error_pos != NULL)
429 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100430 return NULL;
431 }
432 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
433 Py_DECREF(bytes);
434 return cpath;
435#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000436 const size_t len = wcslen(text);
437 char *result = NULL, *bytes = NULL;
438 size_t i, size, converted;
439 wchar_t c, buf[2];
440
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100441#ifndef MS_WINDOWS
442 if (force_ascii == -1)
443 force_ascii = check_force_ascii();
444
445 if (force_ascii)
446 return encode_ascii_surrogateescape(text, error_pos);
447#endif
448
Victor Stinner4e314432010-10-07 21:45:39 +0000449 /* The function works in two steps:
450 1. compute the length of the output buffer in bytes (size)
451 2. outputs the bytes */
452 size = 0;
453 buf[1] = 0;
454 while (1) {
455 for (i=0; i < len; i++) {
456 c = text[i];
457 if (c >= 0xdc80 && c <= 0xdcff) {
458 /* UTF-8b surrogate */
459 if (bytes != NULL) {
460 *bytes++ = c - 0xdc00;
461 size--;
462 }
463 else
464 size++;
465 continue;
466 }
467 else {
468 buf[0] = c;
469 if (bytes != NULL)
470 converted = wcstombs(bytes, buf, size);
471 else
472 converted = wcstombs(NULL, buf, 0);
473 if (converted == (size_t)-1) {
474 if (result != NULL)
475 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000476 if (error_pos != NULL)
477 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000478 return NULL;
479 }
480 if (bytes != NULL) {
481 bytes += converted;
482 size -= converted;
483 }
484 else
485 size += converted;
486 }
487 }
488 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100489 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000490 break;
491 }
492
493 size += 1; /* nul byte at the end */
494 result = PyMem_Malloc(size);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100495 if (result == NULL) {
496 if (error_pos != NULL)
497 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000498 return NULL;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100499 }
Victor Stinner4e314432010-10-07 21:45:39 +0000500 bytes = result;
501 }
502 return result;
Victor Stinnere2623772012-11-12 23:04:02 +0100503#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000504}
505
Victor Stinner4e314432010-10-07 21:45:39 +0000506/* In principle, this should use HAVE__WSTAT, and _wstat
507 should be detected by autoconf. However, no current
508 POSIX system provides that function, so testing for
509 it is pointless.
510 Not sure whether the MS_WINDOWS guards are necessary:
511 perhaps for cygwin/mingw builds?
512*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000513#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000514
515/* Get file status. Encode the path to the locale encoding. */
516
Victor Stinnerb306d752010-10-07 22:09:40 +0000517int
518_Py_wstat(const wchar_t* path, struct stat *buf)
519{
Victor Stinner4e314432010-10-07 21:45:39 +0000520 int err;
521 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000522 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000523 if (fname == NULL) {
524 errno = EINVAL;
525 return -1;
526 }
527 err = stat(fname, buf);
528 PyMem_Free(fname);
529 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000530}
531#endif
532
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100533#ifdef HAVE_STAT
534
Victor Stinner6672d0c2010-10-07 22:53:43 +0000535/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
536 call stat() otherwise. Only fill st_mode attribute on Windows.
537
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100538 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
539 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000540
541int
Victor Stinnera4a75952010-10-07 22:23:10 +0000542_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000543{
544#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000545 int err;
546 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100547 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000548
Victor Stinneree587ea2011-11-17 00:51:38 +0100549 wpath = PyUnicode_AsUnicode(path);
550 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100551 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100552 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000553 if (!err)
554 statbuf->st_mode = wstatbuf.st_mode;
555 return err;
556#else
557 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000558 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000559 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100560 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000561 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
562 Py_DECREF(bytes);
563 return ret;
564#endif
565}
566
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100567#endif
568
Antoine Pitrou409b5382013-10-12 22:41:17 +0200569static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200570get_inheritable(int fd, int raise)
571{
572#ifdef MS_WINDOWS
573 HANDLE handle;
574 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000575
Victor Stinnerdaf45552013-08-28 00:53:59 +0200576 if (!_PyVerify_fd(fd)) {
577 if (raise)
578 PyErr_SetFromErrno(PyExc_OSError);
579 return -1;
580 }
581
582 handle = (HANDLE)_get_osfhandle(fd);
583 if (handle == INVALID_HANDLE_VALUE) {
584 if (raise)
585 PyErr_SetFromWindowsErr(0);
586 return -1;
587 }
588
589 if (!GetHandleInformation(handle, &flags)) {
590 if (raise)
591 PyErr_SetFromWindowsErr(0);
592 return -1;
593 }
594
595 return (flags & HANDLE_FLAG_INHERIT);
596#else
597 int flags;
598
599 flags = fcntl(fd, F_GETFD, 0);
600 if (flags == -1) {
601 if (raise)
602 PyErr_SetFromErrno(PyExc_OSError);
603 return -1;
604 }
605 return !(flags & FD_CLOEXEC);
606#endif
607}
608
609/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200610 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200611 raise an exception and return -1 on error. */
612int
613_Py_get_inheritable(int fd)
614{
615 return get_inheritable(fd, 1);
616}
617
618static int
619set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
620{
621#ifdef MS_WINDOWS
622 HANDLE handle;
623 DWORD flags;
624#elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
625 int request;
626 int err;
627#elif defined(HAVE_FCNTL_H)
628 int flags;
629 int res;
630#endif
631
632 /* atomic_flag_works can only be used to make the file descriptor
633 non-inheritable */
634 assert(!(atomic_flag_works != NULL && inheritable));
635
636 if (atomic_flag_works != NULL && !inheritable) {
637 if (*atomic_flag_works == -1) {
638 int inheritable = get_inheritable(fd, raise);
639 if (inheritable == -1)
640 return -1;
641 *atomic_flag_works = !inheritable;
642 }
643
644 if (*atomic_flag_works)
645 return 0;
646 }
647
648#ifdef MS_WINDOWS
649 if (!_PyVerify_fd(fd)) {
650 if (raise)
651 PyErr_SetFromErrno(PyExc_OSError);
652 return -1;
653 }
654
655 handle = (HANDLE)_get_osfhandle(fd);
656 if (handle == INVALID_HANDLE_VALUE) {
657 if (raise)
658 PyErr_SetFromWindowsErr(0);
659 return -1;
660 }
661
662 if (inheritable)
663 flags = HANDLE_FLAG_INHERIT;
664 else
665 flags = 0;
666 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
667 if (raise)
668 PyErr_SetFromWindowsErr(0);
669 return -1;
670 }
671 return 0;
672
673#elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
674 if (inheritable)
675 request = FIONCLEX;
676 else
677 request = FIOCLEX;
Stefan Krah49d04792013-11-14 15:35:47 +0100678 err = ioctl(fd, request, NULL);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200679 if (err) {
680 if (raise)
681 PyErr_SetFromErrno(PyExc_OSError);
682 return -1;
683 }
684 return 0;
685
686#else
687 flags = fcntl(fd, F_GETFD);
688 if (flags < 0) {
689 if (raise)
690 PyErr_SetFromErrno(PyExc_OSError);
691 return -1;
692 }
693
694 if (inheritable)
695 flags &= ~FD_CLOEXEC;
696 else
697 flags |= FD_CLOEXEC;
698 res = fcntl(fd, F_SETFD, flags);
699 if (res < 0) {
700 if (raise)
701 PyErr_SetFromErrno(PyExc_OSError);
702 return -1;
703 }
704 return 0;
705#endif
706}
707
708/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200709 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200710static int
711make_non_inheritable(int fd)
712{
713 return set_inheritable(fd, 0, 0, NULL);
714}
715
716/* Set the inheritable flag of the specified file descriptor.
717 On success: return 0, on error: raise an exception if raise is nonzero
718 and return -1.
719
720 If atomic_flag_works is not NULL:
721
722 * if *atomic_flag_works==-1, check if the inheritable is set on the file
723 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
724 set the inheritable flag
725 * if *atomic_flag_works==1: do nothing
726 * if *atomic_flag_works==0: set inheritable flag to False
727
728 Set atomic_flag_works to NULL if no atomic flag was used to create the
729 file descriptor.
730
731 atomic_flag_works can only be used to make a file descriptor
732 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
733int
734_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
735{
736 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
737}
738
739/* Open a file with the specified flags (wrapper to open() function).
740 The file descriptor is created non-inheritable. */
741int
742_Py_open(const char *pathname, int flags)
743{
744 int fd;
745#ifdef MS_WINDOWS
746 fd = open(pathname, flags | O_NOINHERIT);
747 if (fd < 0)
748 return fd;
749#else
750
751 int *atomic_flag_works;
752#ifdef O_CLOEXEC
753 atomic_flag_works = &_Py_open_cloexec_works;
754 flags |= O_CLOEXEC;
755#else
756 atomic_flag_works = NULL;
757#endif
758 fd = open(pathname, flags);
759 if (fd < 0)
760 return fd;
761
762 if (set_inheritable(fd, 0, 0, atomic_flag_works) < 0) {
763 close(fd);
764 return -1;
765 }
766#endif /* !MS_WINDOWS */
767 return fd;
768}
769
770/* Open a file. Use _wfopen() on Windows, encode the path to the locale
771 encoding and use fopen() otherwise. The file descriptor is created
772 non-inheritable. */
Victor Stinner4e314432010-10-07 21:45:39 +0000773FILE *
774_Py_wfopen(const wchar_t *path, const wchar_t *mode)
775{
Victor Stinner4e314432010-10-07 21:45:39 +0000776 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200777#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000778 char *cpath;
779 char cmode[10];
780 size_t r;
781 r = wcstombs(cmode, mode, 10);
782 if (r == (size_t)-1 || r >= 10) {
783 errno = EINVAL;
784 return NULL;
785 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000786 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000787 if (cpath == NULL)
788 return NULL;
789 f = fopen(cpath, cmode);
790 PyMem_Free(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +0000791#else
Victor Stinnerdaf45552013-08-28 00:53:59 +0200792 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +0000793#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200794 if (f == NULL)
795 return NULL;
796 if (make_non_inheritable(fileno(f)) < 0) {
797 fclose(f);
798 return NULL;
799 }
800 return f;
Victor Stinner4e314432010-10-07 21:45:39 +0000801}
802
Victor Stinnerdaf45552013-08-28 00:53:59 +0200803/* Wrapper to fopen(). The file descriptor is created non-inheritable. */
804FILE*
805_Py_fopen(const char *pathname, const char *mode)
806{
807 FILE *f = fopen(pathname, mode);
808 if (f == NULL)
809 return NULL;
810 if (make_non_inheritable(fileno(f)) < 0) {
811 fclose(f);
812 return NULL;
813 }
814 return f;
815}
816
817/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
818 encoding and call fopen() otherwise. The file descriptor is created
819 non-inheritable.
Victor Stinner6672d0c2010-10-07 22:53:43 +0000820
821 Return the new file object on success, or NULL if the file cannot be open or
Victor Stinnerdaf45552013-08-28 00:53:59 +0200822 (if PyErr_Occurred()) on unicode error. */
Victor Stinner4e314432010-10-07 21:45:39 +0000823FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +0200824_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000825{
Victor Stinnerdaf45552013-08-28 00:53:59 +0200826 FILE *f;
Victor Stinner4e314432010-10-07 21:45:39 +0000827#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100828 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000829 wchar_t wmode[10];
830 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000831
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100832 if (!PyUnicode_Check(path)) {
833 PyErr_Format(PyExc_TypeError,
834 "str file path expected under Windows, got %R",
835 Py_TYPE(path));
836 return NULL;
837 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100838 wpath = PyUnicode_AsUnicode(path);
839 if (wpath == NULL)
840 return NULL;
841
Victor Stinner4e314432010-10-07 21:45:39 +0000842 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
843 if (usize == 0)
844 return NULL;
845
Victor Stinnerdaf45552013-08-28 00:53:59 +0200846 f = _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000847#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100848 PyObject *bytes;
849 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000850 return NULL;
851 f = fopen(PyBytes_AS_STRING(bytes), mode);
852 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +0000853#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200854 if (f == NULL)
855 return NULL;
856 if (make_non_inheritable(fileno(f)) < 0) {
857 fclose(f);
858 return NULL;
859 }
860 return f;
Victor Stinner4e314432010-10-07 21:45:39 +0000861}
862
863#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000864
865/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100866 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000867
Victor Stinner4e314432010-10-07 21:45:39 +0000868int
869_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
870{
871 char *cpath;
872 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000873 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000874 int res;
875 size_t r1;
876
Victor Stinner2f02a512010-11-08 22:43:46 +0000877 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000878 if (cpath == NULL) {
879 errno = EINVAL;
880 return -1;
881 }
882 res = (int)readlink(cpath, cbuf, PATH_MAX);
883 PyMem_Free(cpath);
884 if (res == -1)
885 return -1;
886 if (res == PATH_MAX) {
887 errno = EINVAL;
888 return -1;
889 }
890 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000891 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000892 if (wbuf == NULL) {
893 errno = EINVAL;
894 return -1;
895 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000896 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200897 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000898 errno = EINVAL;
899 return -1;
900 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000901 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200902 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000903 return (int)r1;
904}
905#endif
906
907#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000908
909/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100910 encoding, decode the result from the locale encoding.
911 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000912
Victor Stinner4e314432010-10-07 21:45:39 +0000913wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000914_Py_wrealpath(const wchar_t *path,
915 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000916{
917 char *cpath;
918 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000919 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000920 char *res;
921 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000922 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000923 if (cpath == NULL) {
924 errno = EINVAL;
925 return NULL;
926 }
927 res = realpath(cpath, cresolved_path);
928 PyMem_Free(cpath);
929 if (res == NULL)
930 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000931
Victor Stinner168e1172010-10-16 23:16:16 +0000932 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000933 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000934 errno = EINVAL;
935 return NULL;
936 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000937 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200938 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000939 errno = EINVAL;
940 return NULL;
941 }
942 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200943 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000944 return resolved_path;
945}
946#endif
947
Victor Stinnerf4061da2010-10-14 12:37:19 +0000948/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100949 including the null character. Decode the path from the locale encoding.
950 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000951
Victor Stinner4e314432010-10-07 21:45:39 +0000952wchar_t*
953_Py_wgetcwd(wchar_t *buf, size_t size)
954{
955#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +0200956 int isize = (int)Py_MIN(size, INT_MAX);
957 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +0000958#else
959 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000960 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000961 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000962
Victor Stinner4e314432010-10-07 21:45:39 +0000963 if (getcwd(fname, PATH_MAX) == NULL)
964 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000965 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000966 if (wname == NULL)
967 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000968 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +0200969 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000970 return NULL;
971 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000972 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +0200973 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000974 return buf;
975#endif
976}
977
Victor Stinnerdaf45552013-08-28 00:53:59 +0200978/* Duplicate a file descriptor. The new file descriptor is created as
979 non-inheritable. Return a new file descriptor on success, raise an OSError
980 exception and return -1 on error.
981
982 The GIL is released to call dup(). The caller must hold the GIL. */
983int
984_Py_dup(int fd)
985{
986#ifdef MS_WINDOWS
987 HANDLE handle;
988 DWORD ftype;
989#endif
990
991 if (!_PyVerify_fd(fd)) {
992 PyErr_SetFromErrno(PyExc_OSError);
993 return -1;
994 }
995
996#ifdef MS_WINDOWS
997 handle = (HANDLE)_get_osfhandle(fd);
998 if (handle == INVALID_HANDLE_VALUE) {
999 PyErr_SetFromWindowsErr(0);
1000 return -1;
1001 }
1002
1003 /* get the file type, ignore the error if it failed */
1004 ftype = GetFileType(handle);
1005
1006 Py_BEGIN_ALLOW_THREADS
1007 fd = dup(fd);
1008 Py_END_ALLOW_THREADS
1009 if (fd < 0) {
1010 PyErr_SetFromErrno(PyExc_OSError);
1011 return -1;
1012 }
1013
1014 /* Character files like console cannot be make non-inheritable */
1015 if (ftype != FILE_TYPE_CHAR) {
1016 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1017 close(fd);
1018 return -1;
1019 }
1020 }
1021#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1022 Py_BEGIN_ALLOW_THREADS
1023 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1024 Py_END_ALLOW_THREADS
1025 if (fd < 0) {
1026 PyErr_SetFromErrno(PyExc_OSError);
1027 return -1;
1028 }
1029
1030#else
1031 Py_BEGIN_ALLOW_THREADS
1032 fd = dup(fd);
1033 Py_END_ALLOW_THREADS
1034 if (fd < 0) {
1035 PyErr_SetFromErrno(PyExc_OSError);
1036 return -1;
1037 }
1038
1039 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1040 close(fd);
1041 return -1;
1042 }
1043#endif
1044 return fd;
1045}
1046