blob: 6502823535152d94353daaf8f215b45aed73550b [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
8#endif
Victor Stinner4e314432010-10-07 21:45:39 +00009
Brett Cannonefb00c02012-02-29 18:31:31 -050010#ifdef HAVE_LANGINFO_H
11#include <langinfo.h>
12#endif
13
Victor Stinnerdaf45552013-08-28 00:53:59 +020014#ifdef HAVE_SYS_IOCTL_H
15#include <sys/ioctl.h>
16#endif
17
18#ifdef HAVE_FCNTL_H
19#include <fcntl.h>
20#endif /* HAVE_FCNTL_H */
21
Victor Stinnere2623772012-11-12 23:04:02 +010022#ifdef __APPLE__
23extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
24#endif
25
Victor Stinnerdaf45552013-08-28 00:53:59 +020026#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020027/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020028
29 -1: unknown
30 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
31 1: open() supports O_CLOEXEC flag, close-on-exec is set
32
33 The flag is used by _Py_open(), io.FileIO and os.open() */
34int _Py_open_cloexec_works = -1;
35#endif
36
Brett Cannonefb00c02012-02-29 18:31:31 -050037PyObject *
38_Py_device_encoding(int fd)
39{
Victor Stinner14b9b112013-06-25 00:37:25 +020040#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050041 UINT cp;
42#endif
43 if (!_PyVerify_fd(fd) || !isatty(fd)) {
44 Py_RETURN_NONE;
45 }
Victor Stinner14b9b112013-06-25 00:37:25 +020046#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050047 if (fd == 0)
48 cp = GetConsoleCP();
49 else if (fd == 1 || fd == 2)
50 cp = GetConsoleOutputCP();
51 else
52 cp = 0;
53 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
54 has no console */
55 if (cp != 0)
56 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
57#elif defined(CODESET)
58 {
59 char *codeset = nl_langinfo(CODESET);
60 if (codeset != NULL && codeset[0] != 0)
61 return PyUnicode_FromString(codeset);
62 }
63#endif
64 Py_RETURN_NONE;
65}
66
Victor Stinnerd45c7f82012-12-04 01:34:47 +010067#if !defined(__APPLE__) && !defined(MS_WINDOWS)
68extern int _Py_normalize_encoding(const char *, char *, size_t);
69
70/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
71 On these operating systems, nl_langinfo(CODESET) announces an alias of the
72 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
73 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
74 locale.getpreferredencoding() codec. For example, if command line arguments
75 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
76 UnicodeEncodeError instead of retrieving the original byte string.
77
78 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
79 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
80 one byte in range 0x80-0xff can be decoded from the locale encoding. The
81 workaround is also enabled on error, for example if getting the locale
82 failed.
83
Philip Jenvey215c49a2013-01-15 13:24:12 -080084 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010085
Victor Stinnerf6a271a2014-08-01 12:28:48 +020086 1: the workaround is used: Py_EncodeLocale() uses
87 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinnerd45c7f82012-12-04 01:34:47 +010088 decode_ascii_surrogateescape()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020089 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
90 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010091 -1: unknown, need to call check_force_ascii() to get the value
92*/
93static int force_ascii = -1;
94
95static int
96check_force_ascii(void)
97{
98 char *loc;
99#if defined(HAVE_LANGINFO_H) && defined(CODESET)
100 char *codeset, **alias;
101 char encoding[100];
102 int is_ascii;
103 unsigned int i;
104 char* ascii_aliases[] = {
105 "ascii",
106 "646",
107 "ansi-x3.4-1968",
108 "ansi-x3-4-1968",
109 "ansi-x3.4-1986",
110 "cp367",
111 "csascii",
112 "ibm367",
113 "iso646-us",
114 "iso-646.irv-1991",
115 "iso-ir-6",
116 "us",
117 "us-ascii",
118 NULL
119 };
120#endif
121
122 loc = setlocale(LC_CTYPE, NULL);
123 if (loc == NULL)
124 goto error;
125 if (strcmp(loc, "C") != 0) {
126 /* the LC_CTYPE locale is different than C */
127 return 0;
128 }
129
130#if defined(HAVE_LANGINFO_H) && defined(CODESET)
131 codeset = nl_langinfo(CODESET);
132 if (!codeset || codeset[0] == '\0') {
133 /* CODESET is not set or empty */
134 goto error;
135 }
136 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
137 goto error;
138
139 is_ascii = 0;
140 for (alias=ascii_aliases; *alias != NULL; alias++) {
141 if (strcmp(encoding, *alias) == 0) {
142 is_ascii = 1;
143 break;
144 }
145 }
146 if (!is_ascii) {
147 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
148 return 0;
149 }
150
151 for (i=0x80; i<0xff; i++) {
152 unsigned char ch;
153 wchar_t wch;
154 size_t res;
155
156 ch = (unsigned char)i;
157 res = mbstowcs(&wch, (char*)&ch, 1);
158 if (res != (size_t)-1) {
159 /* decoding a non-ASCII character from the locale encoding succeed:
160 the locale encoding is not ASCII, force ASCII */
161 return 1;
162 }
163 }
164 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
165 encoding: the locale encoding is really ASCII */
166 return 0;
167#else
168 /* nl_langinfo(CODESET) is not available: always force ASCII */
169 return 1;
170#endif
171
172error:
173 /* if an error occured, force the ASCII encoding */
174 return 1;
175}
176
177static char*
178encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
179{
180 char *result = NULL, *out;
181 size_t len, i;
182 wchar_t ch;
183
184 if (error_pos != NULL)
185 *error_pos = (size_t)-1;
186
187 len = wcslen(text);
188
189 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
190 if (result == NULL)
191 return NULL;
192
193 out = result;
194 for (i=0; i<len; i++) {
195 ch = text[i];
196
197 if (ch <= 0x7f) {
198 /* ASCII character */
199 *out++ = (char)ch;
200 }
201 else if (0xdc80 <= ch && ch <= 0xdcff) {
202 /* UTF-8b surrogate */
203 *out++ = (char)(ch - 0xdc00);
204 }
205 else {
206 if (error_pos != NULL)
207 *error_pos = i;
208 PyMem_Free(result);
209 return NULL;
210 }
211 }
212 *out = '\0';
213 return result;
214}
215#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
216
217#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
218static wchar_t*
219decode_ascii_surrogateescape(const char *arg, size_t *size)
220{
221 wchar_t *res;
222 unsigned char *in;
223 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600224 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100225
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600226 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
227 return NULL;
Benjamin Peterson10ecaa22015-01-04 16:05:39 -0600228 res = PyMem_RawMalloc(argsize*sizeof(wchar_t));
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100229 if (!res)
230 return NULL;
231
232 in = (unsigned char*)arg;
233 out = res;
234 while(*in)
235 if(*in < 128)
236 *out++ = *in++;
237 else
238 *out++ = 0xdc00 + *in++;
239 *out = 0;
240 if (size != NULL)
241 *size = out - res;
242 return res;
243}
244#endif
245
Victor Stinner4e314432010-10-07 21:45:39 +0000246
247/* Decode a byte string from the locale encoding with the
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200248 surrogateescape error handler: undecodable bytes are decoded as characters
249 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
Victor Stinner4e314432010-10-07 21:45:39 +0000250 character, escape the bytes using the surrogateescape error handler instead
251 of decoding them.
252
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200253 Return a pointer to a newly allocated wide character string, use
254 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
255 wide characters excluding the null character into *size
Victor Stinner4e314432010-10-07 21:45:39 +0000256
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200257 Return NULL on decoding error or memory allocation error. If *size* is not
258 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
259 decoding error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000260
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200261 Decoding errors should never happen, unless there is a bug in the C
262 library.
263
264 Use the Py_EncodeLocale() function to encode the character string back to a
265 byte string. */
Victor Stinner4e314432010-10-07 21:45:39 +0000266wchar_t*
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200267Py_DecodeLocale(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000268{
Victor Stinnere2623772012-11-12 23:04:02 +0100269#ifdef __APPLE__
270 wchar_t *wstr;
271 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100272 if (size != NULL) {
273 if (wstr != NULL)
274 *size = wcslen(wstr);
275 else
276 *size = (size_t)-1;
277 }
Victor Stinnere2623772012-11-12 23:04:02 +0100278 return wstr;
279#else
Victor Stinner4e314432010-10-07 21:45:39 +0000280 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100281 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000282 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200283#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000284 unsigned char *in;
285 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000286 mbstate_t mbs;
287#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100288
289#ifndef MS_WINDOWS
290 if (force_ascii == -1)
291 force_ascii = check_force_ascii();
292
293 if (force_ascii) {
294 /* force ASCII encoding to workaround mbstowcs() issue */
295 res = decode_ascii_surrogateescape(arg, size);
296 if (res == NULL)
297 goto oom;
298 return res;
299 }
300#endif
301
302#ifdef HAVE_BROKEN_MBSTOWCS
303 /* Some platforms have a broken implementation of
304 * mbstowcs which does not count the characters that
305 * would result from conversion. Use an upper bound.
306 */
307 argsize = strlen(arg);
308#else
309 argsize = mbstowcs(NULL, arg, 0);
310#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000311 if (argsize != (size_t)-1) {
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600312 if (argsize == PY_SSIZE_T_MAX)
313 goto oom;
314 argsize += 1;
315 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
316 goto oom;
Benjamin Peterson10ecaa22015-01-04 16:05:39 -0600317 res = (wchar_t *)PyMem_RawMalloc(argsize*sizeof(wchar_t));
Victor Stinner4e314432010-10-07 21:45:39 +0000318 if (!res)
319 goto oom;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600320 count = mbstowcs(res, arg, argsize);
Victor Stinner4e314432010-10-07 21:45:39 +0000321 if (count != (size_t)-1) {
322 wchar_t *tmp;
323 /* Only use the result if it contains no
324 surrogate characters. */
325 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100326 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000327 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000328 if (*tmp == 0) {
329 if (size != NULL)
330 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000331 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000332 }
Victor Stinner4e314432010-10-07 21:45:39 +0000333 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200334 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000335 }
336 /* Conversion failed. Fall back to escaping with surrogateescape. */
337#ifdef HAVE_MBRTOWC
338 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
339
340 /* Overallocate; as multi-byte characters are in the argument, the
341 actual output could use less memory. */
342 argsize = strlen(arg) + 1;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600343 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
344 goto oom;
Victor Stinner1a7425f2013-07-07 16:25:15 +0200345 res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000346 if (!res)
347 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000348 in = (unsigned char*)arg;
349 out = res;
350 memset(&mbs, 0, sizeof mbs);
351 while (argsize) {
352 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
353 if (converted == 0)
354 /* Reached end of string; null char stored. */
355 break;
356 if (converted == (size_t)-2) {
357 /* Incomplete character. This should never happen,
358 since we provide everything that we have -
359 unless there is a bug in the C library, or I
360 misunderstood how mbrtowc works. */
Victor Stinner1a7425f2013-07-07 16:25:15 +0200361 PyMem_RawFree(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100362 if (size != NULL)
363 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000364 return NULL;
365 }
366 if (converted == (size_t)-1) {
367 /* Conversion error. Escape as UTF-8b, and start over
368 in the initial shift state. */
369 *out++ = 0xdc00 + *in++;
370 argsize--;
371 memset(&mbs, 0, sizeof mbs);
372 continue;
373 }
Victor Stinner76df43d2012-10-30 01:42:39 +0100374 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000375 /* Surrogate character. Escape the original
376 byte sequence with surrogateescape. */
377 argsize -= converted;
378 while (converted--)
379 *out++ = 0xdc00 + *in++;
380 continue;
381 }
382 /* successfully converted some bytes */
383 in += converted;
384 argsize -= converted;
385 out++;
386 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100387 if (size != NULL)
388 *size = out - res;
Victor Stinnere2623772012-11-12 23:04:02 +0100389#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000390 /* Cannot use C locale for escaping; manually escape as if charset
391 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
392 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100393 res = decode_ascii_surrogateescape(arg, size);
394 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100395 goto oom;
Victor Stinnere2623772012-11-12 23:04:02 +0100396#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000397 return res;
398oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100399 if (size != NULL)
400 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000401 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100402#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000403}
404
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200405/* Encode a wide character string to the locale encoding with the
406 surrogateescape error handler: surrogate characters in the range
407 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
Victor Stinner4e314432010-10-07 21:45:39 +0000408
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200409 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
410 the memory. Return NULL on encoding or memory allocation error.
Victor Stinner4e314432010-10-07 21:45:39 +0000411
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200412 If error_pos is not NULL, *error_pos is set to the index of the invalid
413 character on encoding error, or set to (size_t)-1 otherwise.
Victor Stinner2f02a512010-11-08 22:43:46 +0000414
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200415 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
416 character string. */
Victor Stinner4e314432010-10-07 21:45:39 +0000417char*
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200418Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000419{
Victor Stinnere2623772012-11-12 23:04:02 +0100420#ifdef __APPLE__
421 Py_ssize_t len;
422 PyObject *unicode, *bytes = NULL;
423 char *cpath;
424
425 unicode = PyUnicode_FromWideChar(text, wcslen(text));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100426 if (unicode == NULL)
Victor Stinnere2623772012-11-12 23:04:02 +0100427 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100428
429 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
430 Py_DECREF(unicode);
431 if (bytes == NULL) {
432 PyErr_Clear();
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100433 if (error_pos != NULL)
434 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100435 return NULL;
436 }
437
438 len = PyBytes_GET_SIZE(bytes);
439 cpath = PyMem_Malloc(len+1);
440 if (cpath == NULL) {
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100441 PyErr_Clear();
Victor Stinnere2623772012-11-12 23:04:02 +0100442 Py_DECREF(bytes);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100443 if (error_pos != NULL)
444 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100445 return NULL;
446 }
447 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
448 Py_DECREF(bytes);
449 return cpath;
450#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000451 const size_t len = wcslen(text);
452 char *result = NULL, *bytes = NULL;
453 size_t i, size, converted;
454 wchar_t c, buf[2];
455
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100456#ifndef MS_WINDOWS
457 if (force_ascii == -1)
458 force_ascii = check_force_ascii();
459
460 if (force_ascii)
461 return encode_ascii_surrogateescape(text, error_pos);
462#endif
463
Victor Stinner4e314432010-10-07 21:45:39 +0000464 /* The function works in two steps:
465 1. compute the length of the output buffer in bytes (size)
466 2. outputs the bytes */
467 size = 0;
468 buf[1] = 0;
469 while (1) {
470 for (i=0; i < len; i++) {
471 c = text[i];
472 if (c >= 0xdc80 && c <= 0xdcff) {
473 /* UTF-8b surrogate */
474 if (bytes != NULL) {
475 *bytes++ = c - 0xdc00;
476 size--;
477 }
478 else
479 size++;
480 continue;
481 }
482 else {
483 buf[0] = c;
484 if (bytes != NULL)
485 converted = wcstombs(bytes, buf, size);
486 else
487 converted = wcstombs(NULL, buf, 0);
488 if (converted == (size_t)-1) {
489 if (result != NULL)
490 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000491 if (error_pos != NULL)
492 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000493 return NULL;
494 }
495 if (bytes != NULL) {
496 bytes += converted;
497 size -= converted;
498 }
499 else
500 size += converted;
501 }
502 }
503 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100504 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000505 break;
506 }
507
508 size += 1; /* nul byte at the end */
509 result = PyMem_Malloc(size);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100510 if (result == NULL) {
511 if (error_pos != NULL)
512 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000513 return NULL;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100514 }
Victor Stinner4e314432010-10-07 21:45:39 +0000515 bytes = result;
516 }
517 return result;
Victor Stinnere2623772012-11-12 23:04:02 +0100518#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000519}
520
Victor Stinner4e314432010-10-07 21:45:39 +0000521/* In principle, this should use HAVE__WSTAT, and _wstat
522 should be detected by autoconf. However, no current
523 POSIX system provides that function, so testing for
524 it is pointless.
525 Not sure whether the MS_WINDOWS guards are necessary:
526 perhaps for cygwin/mingw builds?
527*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000528#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000529
530/* Get file status. Encode the path to the locale encoding. */
531
Victor Stinnerb306d752010-10-07 22:09:40 +0000532int
533_Py_wstat(const wchar_t* path, struct stat *buf)
534{
Victor Stinner4e314432010-10-07 21:45:39 +0000535 int err;
536 char *fname;
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200537 fname = Py_EncodeLocale(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000538 if (fname == NULL) {
539 errno = EINVAL;
540 return -1;
541 }
542 err = stat(fname, buf);
543 PyMem_Free(fname);
544 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000545}
546#endif
547
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100548
Steve Dowerf2f373f2015-02-21 08:44:05 -0800549#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
550
551#ifdef MS_WINDOWS
552static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
553
554static void
555FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
556{
557 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
558 /* Cannot simply cast and dereference in_ptr,
559 since it might not be aligned properly */
560 __int64 in;
561 memcpy(&in, in_ptr, sizeof(in));
562 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
563 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
564}
565
566void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800567_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800568{
569 /* XXX endianness */
570 __int64 out;
571 out = time_in + secs_between_epochs;
572 out = out * 10000000 + nsec_in / 100;
573 memcpy(out_ptr, &out, sizeof(out));
574}
575
576/* Below, we *know* that ugo+r is 0444 */
577#if _S_IREAD != 0400
578#error Unsupported C library
579#endif
580static int
581attributes_to_mode(DWORD attr)
582{
583 int m = 0;
584 if (attr & FILE_ATTRIBUTE_DIRECTORY)
585 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
586 else
587 m |= _S_IFREG;
588 if (attr & FILE_ATTRIBUTE_READONLY)
589 m |= 0444;
590 else
591 m |= 0666;
592 return m;
593}
594
Steve Dowerbf1f3762015-02-21 15:26:02 -0800595void
Steve Dowera2af1a52015-02-21 10:04:10 -0800596_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800597{
598 memset(result, 0, sizeof(*result));
599 result->st_mode = attributes_to_mode(info->dwFileAttributes);
600 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
601 result->st_dev = info->dwVolumeSerialNumber;
602 result->st_rdev = result->st_dev;
603 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
604 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
605 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
606 result->st_nlink = info->nNumberOfLinks;
607 result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow;
608 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
609 /* first clear the S_IFMT bits */
610 result->st_mode ^= (result->st_mode & S_IFMT);
611 /* now set the bits that make this a symlink */
612 result->st_mode |= S_IFLNK;
613 }
614 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800615}
616#endif
617
618/* Return information about a file.
619
620 On POSIX, use fstat().
621
622 On Windows, use GetFileType() and GetFileInformationByHandle() which support
623 files larger than 2 GB. fstat() may fail with EOVERFLOW on files larger
624 than 2 GB because the file size type is an signed 32-bit integer: see issue
625 #23152.
626 */
627int
628_Py_fstat(int fd, struct _Py_stat_struct *result)
629{
630#ifdef MS_WINDOWS
631 BY_HANDLE_FILE_INFORMATION info;
632 HANDLE h;
633 int type;
634
635 if (!_PyVerify_fd(fd))
636 h = INVALID_HANDLE_VALUE;
637 else
638 h = (HANDLE)_get_osfhandle(fd);
639
Steve Dower8acde7d2015-03-07 18:14:07 -0800640 /* Protocol violation: we explicitly clear errno, instead of
641 setting it to a POSIX error. Callers should use GetLastError. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800642 errno = 0;
643
644 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8acde7d2015-03-07 18:14:07 -0800645 /* This is really a C library error (invalid file handle).
646 We set the Win32 error to the closes one matching. */
647 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800648 return -1;
649 }
650 memset(result, 0, sizeof(*result));
651
652 type = GetFileType(h);
653 if (type == FILE_TYPE_UNKNOWN) {
654 DWORD error = GetLastError();
655 if (error != 0) {
656 return -1;
657 }
658 /* else: valid but unknown file */
659 }
660
661 if (type != FILE_TYPE_DISK) {
662 if (type == FILE_TYPE_CHAR)
663 result->st_mode = _S_IFCHR;
664 else if (type == FILE_TYPE_PIPE)
665 result->st_mode = _S_IFIFO;
666 return 0;
667 }
668
669 if (!GetFileInformationByHandle(h, &info)) {
670 return -1;
671 }
672
Steve Dowera2af1a52015-02-21 10:04:10 -0800673 _Py_attribute_data_to_stat(&info, 0, result);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800674 /* specific to fstat() */
675 result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow;
676 return 0;
677#else
678 return fstat(fd, result);
679#endif
680}
681#endif /* HAVE_FSTAT || MS_WINDOWS */
682
683
684#ifdef HAVE_STAT
Victor Stinner6672d0c2010-10-07 22:53:43 +0000685/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
686 call stat() otherwise. Only fill st_mode attribute on Windows.
687
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100688 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
689 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000690
691int
Victor Stinnera4a75952010-10-07 22:23:10 +0000692_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000693{
694#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000695 int err;
696 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100697 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000698
Victor Stinneree587ea2011-11-17 00:51:38 +0100699 wpath = PyUnicode_AsUnicode(path);
700 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100701 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100702 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000703 if (!err)
704 statbuf->st_mode = wstatbuf.st_mode;
705 return err;
706#else
707 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000708 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000709 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100710 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000711 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
712 Py_DECREF(bytes);
713 return ret;
714#endif
715}
716
Steve Dowerf2f373f2015-02-21 08:44:05 -0800717#endif /* HAVE_STAT */
718
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100719
Antoine Pitrou409b5382013-10-12 22:41:17 +0200720static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200721get_inheritable(int fd, int raise)
722{
723#ifdef MS_WINDOWS
724 HANDLE handle;
725 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000726
Victor Stinnerdaf45552013-08-28 00:53:59 +0200727 if (!_PyVerify_fd(fd)) {
728 if (raise)
729 PyErr_SetFromErrno(PyExc_OSError);
730 return -1;
731 }
732
733 handle = (HANDLE)_get_osfhandle(fd);
734 if (handle == INVALID_HANDLE_VALUE) {
735 if (raise)
736 PyErr_SetFromWindowsErr(0);
737 return -1;
738 }
739
740 if (!GetHandleInformation(handle, &flags)) {
741 if (raise)
742 PyErr_SetFromWindowsErr(0);
743 return -1;
744 }
745
746 return (flags & HANDLE_FLAG_INHERIT);
747#else
748 int flags;
749
750 flags = fcntl(fd, F_GETFD, 0);
751 if (flags == -1) {
752 if (raise)
753 PyErr_SetFromErrno(PyExc_OSError);
754 return -1;
755 }
756 return !(flags & FD_CLOEXEC);
757#endif
758}
759
760/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200761 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200762 raise an exception and return -1 on error. */
763int
764_Py_get_inheritable(int fd)
765{
766 return get_inheritable(fd, 1);
767}
768
769static int
770set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
771{
772#ifdef MS_WINDOWS
773 HANDLE handle;
774 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +0200775#else
776#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
777 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200778 int request;
779 int err;
Victor Stinner282124b2014-09-02 11:41:04 +0200780#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200781 int flags;
782 int res;
783#endif
784
785 /* atomic_flag_works can only be used to make the file descriptor
786 non-inheritable */
787 assert(!(atomic_flag_works != NULL && inheritable));
788
789 if (atomic_flag_works != NULL && !inheritable) {
790 if (*atomic_flag_works == -1) {
791 int inheritable = get_inheritable(fd, raise);
792 if (inheritable == -1)
793 return -1;
794 *atomic_flag_works = !inheritable;
795 }
796
797 if (*atomic_flag_works)
798 return 0;
799 }
800
801#ifdef MS_WINDOWS
802 if (!_PyVerify_fd(fd)) {
803 if (raise)
804 PyErr_SetFromErrno(PyExc_OSError);
805 return -1;
806 }
807
808 handle = (HANDLE)_get_osfhandle(fd);
809 if (handle == INVALID_HANDLE_VALUE) {
810 if (raise)
811 PyErr_SetFromWindowsErr(0);
812 return -1;
813 }
814
815 if (inheritable)
816 flags = HANDLE_FLAG_INHERIT;
817 else
818 flags = 0;
819 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
820 if (raise)
821 PyErr_SetFromWindowsErr(0);
822 return -1;
823 }
824 return 0;
825
Victor Stinnerdaf45552013-08-28 00:53:59 +0200826#else
Victor Stinner282124b2014-09-02 11:41:04 +0200827
828#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
829 if (ioctl_works != 0) {
830 /* fast-path: ioctl() only requires one syscall */
831 if (inheritable)
832 request = FIONCLEX;
833 else
834 request = FIOCLEX;
835 err = ioctl(fd, request, NULL);
836 if (!err) {
837 ioctl_works = 1;
838 return 0;
839 }
840
841 if (errno != ENOTTY) {
842 if (raise)
843 PyErr_SetFromErrno(PyExc_OSError);
844 return -1;
845 }
846 else {
847 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
848 device". The ioctl is declared but not supported by the kernel.
849 Remember that ioctl() doesn't work. It is the case on
850 Illumos-based OS for example. */
851 ioctl_works = 0;
852 }
853 /* fallback to fcntl() if ioctl() does not work */
854 }
855#endif
856
857 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200858 flags = fcntl(fd, F_GETFD);
859 if (flags < 0) {
860 if (raise)
861 PyErr_SetFromErrno(PyExc_OSError);
862 return -1;
863 }
864
865 if (inheritable)
866 flags &= ~FD_CLOEXEC;
867 else
868 flags |= FD_CLOEXEC;
869 res = fcntl(fd, F_SETFD, flags);
870 if (res < 0) {
871 if (raise)
872 PyErr_SetFromErrno(PyExc_OSError);
873 return -1;
874 }
875 return 0;
876#endif
877}
878
879/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200880 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200881static int
882make_non_inheritable(int fd)
883{
884 return set_inheritable(fd, 0, 0, NULL);
885}
886
887/* Set the inheritable flag of the specified file descriptor.
888 On success: return 0, on error: raise an exception if raise is nonzero
889 and return -1.
890
891 If atomic_flag_works is not NULL:
892
893 * if *atomic_flag_works==-1, check if the inheritable is set on the file
894 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
895 set the inheritable flag
896 * if *atomic_flag_works==1: do nothing
897 * if *atomic_flag_works==0: set inheritable flag to False
898
899 Set atomic_flag_works to NULL if no atomic flag was used to create the
900 file descriptor.
901
902 atomic_flag_works can only be used to make a file descriptor
903 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
904int
905_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
906{
907 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
908}
909
910/* Open a file with the specified flags (wrapper to open() function).
911 The file descriptor is created non-inheritable. */
912int
913_Py_open(const char *pathname, int flags)
914{
915 int fd;
916#ifdef MS_WINDOWS
917 fd = open(pathname, flags | O_NOINHERIT);
918 if (fd < 0)
919 return fd;
920#else
921
922 int *atomic_flag_works;
923#ifdef O_CLOEXEC
924 atomic_flag_works = &_Py_open_cloexec_works;
925 flags |= O_CLOEXEC;
926#else
927 atomic_flag_works = NULL;
928#endif
929 fd = open(pathname, flags);
930 if (fd < 0)
931 return fd;
932
933 if (set_inheritable(fd, 0, 0, atomic_flag_works) < 0) {
934 close(fd);
935 return -1;
936 }
937#endif /* !MS_WINDOWS */
938 return fd;
939}
940
941/* Open a file. Use _wfopen() on Windows, encode the path to the locale
942 encoding and use fopen() otherwise. The file descriptor is created
943 non-inheritable. */
Victor Stinner4e314432010-10-07 21:45:39 +0000944FILE *
945_Py_wfopen(const wchar_t *path, const wchar_t *mode)
946{
Victor Stinner4e314432010-10-07 21:45:39 +0000947 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200948#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000949 char *cpath;
950 char cmode[10];
951 size_t r;
952 r = wcstombs(cmode, mode, 10);
953 if (r == (size_t)-1 || r >= 10) {
954 errno = EINVAL;
955 return NULL;
956 }
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200957 cpath = Py_EncodeLocale(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000958 if (cpath == NULL)
959 return NULL;
960 f = fopen(cpath, cmode);
961 PyMem_Free(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +0000962#else
Victor Stinnerdaf45552013-08-28 00:53:59 +0200963 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +0000964#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +0200965 if (f == NULL)
966 return NULL;
967 if (make_non_inheritable(fileno(f)) < 0) {
968 fclose(f);
969 return NULL;
970 }
971 return f;
Victor Stinner4e314432010-10-07 21:45:39 +0000972}
973
Victor Stinnerdaf45552013-08-28 00:53:59 +0200974/* Wrapper to fopen(). The file descriptor is created non-inheritable. */
975FILE*
976_Py_fopen(const char *pathname, const char *mode)
977{
978 FILE *f = fopen(pathname, mode);
979 if (f == NULL)
980 return NULL;
981 if (make_non_inheritable(fileno(f)) < 0) {
982 fclose(f);
983 return NULL;
984 }
985 return f;
986}
987
988/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
989 encoding and call fopen() otherwise. The file descriptor is created
990 non-inheritable.
Victor Stinner6672d0c2010-10-07 22:53:43 +0000991
992 Return the new file object on success, or NULL if the file cannot be open or
Victor Stinnerdaf45552013-08-28 00:53:59 +0200993 (if PyErr_Occurred()) on unicode error. */
Victor Stinner4e314432010-10-07 21:45:39 +0000994FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +0200995_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000996{
Victor Stinnerdaf45552013-08-28 00:53:59 +0200997 FILE *f;
Victor Stinner4e314432010-10-07 21:45:39 +0000998#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100999 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001000 wchar_t wmode[10];
1001 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001002
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001003 if (!PyUnicode_Check(path)) {
1004 PyErr_Format(PyExc_TypeError,
1005 "str file path expected under Windows, got %R",
1006 Py_TYPE(path));
1007 return NULL;
1008 }
Victor Stinneree587ea2011-11-17 00:51:38 +01001009 wpath = PyUnicode_AsUnicode(path);
1010 if (wpath == NULL)
1011 return NULL;
1012
Victor Stinner4e314432010-10-07 21:45:39 +00001013 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
1014 if (usize == 0)
1015 return NULL;
1016
Victor Stinnerdaf45552013-08-28 00:53:59 +02001017 f = _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +00001018#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001019 PyObject *bytes;
1020 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001021 return NULL;
1022 f = fopen(PyBytes_AS_STRING(bytes), mode);
1023 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001024#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001025 if (f == NULL)
1026 return NULL;
1027 if (make_non_inheritable(fileno(f)) < 0) {
1028 fclose(f);
1029 return NULL;
1030 }
1031 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001032}
1033
1034#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001035
1036/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001037 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001038
Victor Stinner4e314432010-10-07 21:45:39 +00001039int
1040_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1041{
1042 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001043 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001044 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001045 int res;
1046 size_t r1;
1047
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001048 cpath = Py_EncodeLocale(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001049 if (cpath == NULL) {
1050 errno = EINVAL;
1051 return -1;
1052 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001053 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner4e314432010-10-07 21:45:39 +00001054 PyMem_Free(cpath);
1055 if (res == -1)
1056 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001057 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001058 errno = EINVAL;
1059 return -1;
1060 }
1061 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001062 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001063 if (wbuf == NULL) {
1064 errno = EINVAL;
1065 return -1;
1066 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001067 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001068 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001069 errno = EINVAL;
1070 return -1;
1071 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001072 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001073 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001074 return (int)r1;
1075}
1076#endif
1077
1078#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001079
1080/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001081 encoding, decode the result from the locale encoding.
1082 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001083
Victor Stinner4e314432010-10-07 21:45:39 +00001084wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001085_Py_wrealpath(const wchar_t *path,
1086 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001087{
1088 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001089 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001090 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001091 char *res;
1092 size_t r;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001093 cpath = Py_EncodeLocale(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001094 if (cpath == NULL) {
1095 errno = EINVAL;
1096 return NULL;
1097 }
1098 res = realpath(cpath, cresolved_path);
1099 PyMem_Free(cpath);
1100 if (res == NULL)
1101 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001102
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001103 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001104 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001105 errno = EINVAL;
1106 return NULL;
1107 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001108 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001109 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001110 errno = EINVAL;
1111 return NULL;
1112 }
1113 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001114 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001115 return resolved_path;
1116}
1117#endif
1118
Victor Stinnerf4061da2010-10-14 12:37:19 +00001119/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001120 including the null character. Decode the path from the locale encoding.
1121 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001122
Victor Stinner4e314432010-10-07 21:45:39 +00001123wchar_t*
1124_Py_wgetcwd(wchar_t *buf, size_t size)
1125{
1126#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001127 int isize = (int)Py_MIN(size, INT_MAX);
1128 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001129#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001130 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001131 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001132 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001133
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001134 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001135 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001136 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001137 if (wname == NULL)
1138 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001139 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001140 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001141 return NULL;
1142 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001143 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001144 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001145 return buf;
1146#endif
1147}
1148
Victor Stinnerdaf45552013-08-28 00:53:59 +02001149/* Duplicate a file descriptor. The new file descriptor is created as
1150 non-inheritable. Return a new file descriptor on success, raise an OSError
1151 exception and return -1 on error.
1152
1153 The GIL is released to call dup(). The caller must hold the GIL. */
1154int
1155_Py_dup(int fd)
1156{
1157#ifdef MS_WINDOWS
1158 HANDLE handle;
1159 DWORD ftype;
1160#endif
1161
1162 if (!_PyVerify_fd(fd)) {
1163 PyErr_SetFromErrno(PyExc_OSError);
1164 return -1;
1165 }
1166
1167#ifdef MS_WINDOWS
1168 handle = (HANDLE)_get_osfhandle(fd);
1169 if (handle == INVALID_HANDLE_VALUE) {
1170 PyErr_SetFromWindowsErr(0);
1171 return -1;
1172 }
1173
1174 /* get the file type, ignore the error if it failed */
1175 ftype = GetFileType(handle);
1176
1177 Py_BEGIN_ALLOW_THREADS
1178 fd = dup(fd);
1179 Py_END_ALLOW_THREADS
1180 if (fd < 0) {
1181 PyErr_SetFromErrno(PyExc_OSError);
1182 return -1;
1183 }
1184
1185 /* Character files like console cannot be make non-inheritable */
1186 if (ftype != FILE_TYPE_CHAR) {
1187 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1188 close(fd);
1189 return -1;
1190 }
1191 }
1192#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1193 Py_BEGIN_ALLOW_THREADS
1194 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1195 Py_END_ALLOW_THREADS
1196 if (fd < 0) {
1197 PyErr_SetFromErrno(PyExc_OSError);
1198 return -1;
1199 }
1200
1201#else
1202 Py_BEGIN_ALLOW_THREADS
1203 fd = dup(fd);
1204 Py_END_ALLOW_THREADS
1205 if (fd < 0) {
1206 PyErr_SetFromErrno(PyExc_OSError);
1207 return -1;
1208 }
1209
1210 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1211 close(fd);
1212 return -1;
1213 }
1214#endif
1215 return fd;
1216}
1217
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001218#ifndef MS_WINDOWS
1219/* Get the blocking mode of the file descriptor.
1220 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1221 raise an exception and return -1 on error. */
1222int
1223_Py_get_blocking(int fd)
1224{
1225 int flags = fcntl(fd, F_GETFL, 0);
1226 if (flags < 0) {
1227 PyErr_SetFromErrno(PyExc_OSError);
1228 return -1;
1229 }
1230
1231 return !(flags & O_NONBLOCK);
1232}
1233
1234/* Set the blocking mode of the specified file descriptor.
1235
1236 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1237 otherwise.
1238
1239 Return 0 on success, raise an exception and return -1 on error. */
1240int
1241_Py_set_blocking(int fd, int blocking)
1242{
1243#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1244 int arg = !blocking;
1245 if (ioctl(fd, FIONBIO, &arg) < 0)
1246 goto error;
1247#else
1248 int flags, res;
1249
1250 flags = fcntl(fd, F_GETFL, 0);
1251 if (flags < 0)
1252 goto error;
1253
1254 if (blocking)
1255 flags = flags & (~O_NONBLOCK);
1256 else
1257 flags = flags | O_NONBLOCK;
1258
1259 res = fcntl(fd, F_SETFL, flags);
1260 if (res < 0)
1261 goto error;
1262#endif
1263 return 0;
1264
1265error:
1266 PyErr_SetFromErrno(PyExc_OSError);
1267 return -1;
1268}
1269#endif
1270
Steve Dowerd81431f2015-03-06 14:47:02 -08001271#ifdef _MSC_VER
1272#if _MSC_VER >= 1900
1273
1274/* This function lets the Windows CRT validate the file handle without
1275 terminating the process if it's invalid. */
1276int
1277_PyVerify_fd(int fd)
1278{
1279 intptr_t osh;
1280 /* Fast check for the only condition we know */
1281 if (fd < 0) {
1282 _set_errno(EBADF);
1283 return 0;
1284 }
1285 osh = _get_osfhandle(fd);
1286 return osh != (intptr_t)-1;
1287}
1288
1289#elif _MSC_VER >= 1400
1290/* Legacy implementation of _PyVerify_fd while transitioning to
1291 * MSVC 14.0. This should eventually be removed. (issue23524)
1292 */
1293
1294/* Microsoft CRT in VS2005 and higher will verify that a filehandle is
1295 * valid and raise an assertion if it isn't.
1296 * Normally, an invalid fd is likely to be a C program error and therefore
1297 * an assertion can be useful, but it does contradict the POSIX standard
1298 * which for write(2) states:
1299 * "Otherwise, -1 shall be returned and errno set to indicate the error."
1300 * "[EBADF] The fildes argument is not a valid file descriptor open for
1301 * writing."
1302 * Furthermore, python allows the user to enter any old integer
1303 * as a fd and should merely raise a python exception on error.
1304 * The Microsoft CRT doesn't provide an official way to check for the
1305 * validity of a file descriptor, but we can emulate its internal behaviour
1306 * by using the exported __pinfo data member and knowledge of the
1307 * internal structures involved.
1308 * The structures below must be updated for each version of visual studio
1309 * according to the file internal.h in the CRT source, until MS comes
1310 * up with a less hacky way to do this.
1311 * (all of this is to avoid globally modifying the CRT behaviour using
1312 * _set_invalid_parameter_handler() and _CrtSetReportMode())
1313 */
1314/* The actual size of the structure is determined at runtime.
1315 * Only the first items must be present.
1316 */
1317typedef struct {
1318 intptr_t osfhnd;
1319 char osfile;
1320} my_ioinfo;
1321
1322extern __declspec(dllimport) char * __pioinfo[];
1323#define IOINFO_L2E 5
1324#define IOINFO_ARRAYS 64
1325#define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E)
1326#define _NHANDLE_ (IOINFO_ARRAYS * IOINFO_ARRAY_ELTS)
1327#define FOPEN 0x01
1328#define _NO_CONSOLE_FILENO (intptr_t)-2
1329
1330/* This function emulates what the windows CRT does to validate file handles */
1331int
1332_PyVerify_fd(int fd)
1333{
1334 const int i1 = fd >> IOINFO_L2E;
1335 const int i2 = fd & ((1 << IOINFO_L2E) - 1);
1336
1337 static size_t sizeof_ioinfo = 0;
1338
1339 /* Determine the actual size of the ioinfo structure,
1340 * as used by the CRT loaded in memory
1341 */
1342 if (sizeof_ioinfo == 0 && __pioinfo[0] != NULL) {
1343 sizeof_ioinfo = _msize(__pioinfo[0]) / IOINFO_ARRAY_ELTS;
1344 }
1345 if (sizeof_ioinfo == 0) {
1346 /* This should not happen... */
1347 goto fail;
1348 }
1349
1350 /* See that it isn't a special CLEAR fileno */
1351 if (fd != _NO_CONSOLE_FILENO) {
1352 /* Microsoft CRT would check that 0<=fd<_nhandle but we can't do that. Instead
1353 * we check pointer validity and other info
1354 */
1355 if (0 <= i1 && i1 < IOINFO_ARRAYS && __pioinfo[i1] != NULL) {
1356 /* finally, check that the file is open */
1357 my_ioinfo* info = (my_ioinfo*)(__pioinfo[i1] + i2 * sizeof_ioinfo);
1358 if (info->osfile & FOPEN) {
1359 return 1;
1360 }
1361 }
1362 }
1363 fail:
1364 errno = EBADF;
1365 return 0;
1366}
1367
1368#endif /* _MSC_VER >= 1900 || _MSC_VER >= 1400 */
1369#endif /* defined _MSC_VER */