blob: 5e71d375260a18190a64c95a5ecae38f90f21482 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04008extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +00009#endif
Victor Stinner4e314432010-10-07 21:45:39 +000010
Brett Cannonefb00c02012-02-29 18:31:31 -050011#ifdef HAVE_LANGINFO_H
12#include <langinfo.h>
13#endif
14
Victor Stinnerdaf45552013-08-28 00:53:59 +020015#ifdef HAVE_SYS_IOCTL_H
16#include <sys/ioctl.h>
17#endif
18
19#ifdef HAVE_FCNTL_H
20#include <fcntl.h>
21#endif /* HAVE_FCNTL_H */
22
Victor Stinnerdaf45552013-08-28 00:53:59 +020023#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020024/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020025
26 -1: unknown
27 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28 1: open() supports O_CLOEXEC flag, close-on-exec is set
29
Victor Stinnera555cfc2015-03-18 00:22:14 +010030 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020032int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
Steve Dower8fc89802015-04-12 00:26:27 -040041 int valid;
42 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070043 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040044 _Py_END_SUPPRESS_IPH
45 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040047
Victor Stinner14b9b112013-06-25 00:37:25 +020048#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050049 if (fd == 0)
50 cp = GetConsoleCP();
51 else if (fd == 1 || fd == 2)
52 cp = GetConsoleOutputCP();
53 else
54 cp = 0;
55 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56 has no console */
57 if (cp != 0)
58 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59#elif defined(CODESET)
60 {
61 char *codeset = nl_langinfo(CODESET);
62 if (codeset != NULL && codeset[0] != 0)
63 return PyUnicode_FromString(codeset);
64 }
65#endif
66 Py_RETURN_NONE;
67}
68
Victor Stinner7ed7aea2018-01-15 10:45:49 +010069#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70
71#define USE_FORCE_ASCII
72
Victor Stinnerd45c7f82012-12-04 01:34:47 +010073extern int _Py_normalize_encoding(const char *, char *, size_t);
74
75/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76 On these operating systems, nl_langinfo(CODESET) announces an alias of the
77 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79 locale.getpreferredencoding() codec. For example, if command line arguments
80 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81 UnicodeEncodeError instead of retrieving the original byte string.
82
83 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85 one byte in range 0x80-0xff can be decoded from the locale encoding. The
86 workaround is also enabled on error, for example if getting the locale
87 failed.
88
Philip Jenvey215c49a2013-01-15 13:24:12 -080089 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010090
Victor Stinnerf6a271a2014-08-01 12:28:48 +020091 1: the workaround is used: Py_EncodeLocale() uses
92 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +010093 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020094 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 -1: unknown, need to call check_force_ascii() to get the value
97*/
98static int force_ascii = -1;
99
100static int
101check_force_ascii(void)
102{
103 char *loc;
104#if defined(HAVE_LANGINFO_H) && defined(CODESET)
105 char *codeset, **alias;
Victor Stinner54de2b12016-09-09 23:11:52 -0700106 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100107 int is_ascii;
108 unsigned int i;
109 char* ascii_aliases[] = {
110 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700111 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100112 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700113 "ansi_x3.4_1968",
114 "ansi_x3.4_1986",
115 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 "cp367",
117 "csascii",
118 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700119 "iso646_us",
120 "iso_646.irv_1991",
121 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100122 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700123 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100124 NULL
125 };
126#endif
127
128 loc = setlocale(LC_CTYPE, NULL);
129 if (loc == NULL)
130 goto error;
Victor Stinner65ef7422018-08-28 13:51:20 +0200131 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100132 /* the LC_CTYPE locale is different than C */
133 return 0;
134 }
135
136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
137 codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143 goto error;
144
145 is_ascii = 0;
146 for (alias=ascii_aliases; *alias != NULL; alias++) {
147 if (strcmp(encoding, *alias) == 0) {
148 is_ascii = 1;
149 break;
150 }
151 }
152 if (!is_ascii) {
153 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154 return 0;
155 }
156
157 for (i=0x80; i<0xff; i++) {
158 unsigned char ch;
159 wchar_t wch;
160 size_t res;
161
162 ch = (unsigned char)i;
163 res = mbstowcs(&wch, (char*)&ch, 1);
164 if (res != (size_t)-1) {
165 /* decoding a non-ASCII character from the locale encoding succeed:
166 the locale encoding is not ASCII, force ASCII */
167 return 1;
168 }
169 }
170 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171 encoding: the locale encoding is really ASCII */
172 return 0;
173#else
174 /* nl_langinfo(CODESET) is not available: always force ASCII */
175 return 1;
176#endif
177
178error:
Martin Panter46f50722016-05-26 05:35:26 +0000179 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 return 1;
181}
182
Victor Stinner21220bb2018-10-30 12:59:20 +0100183
184int
185_Py_GetForceASCII(void)
186{
187 if (force_ascii == -1) {
188 force_ascii = check_force_ascii();
189 }
190 return force_ascii;
191}
192
193
Victor Stinnerf6e323c2018-11-23 13:37:42 +0100194void
195_Py_ResetForceASCII(void)
196{
197 force_ascii = -1;
198}
199
Victor Stinner21220bb2018-10-30 12:59:20 +0100200
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100201static int
202encode_ascii(const wchar_t *text, char **str,
203 size_t *error_pos, const char **reason,
204 int raw_malloc, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100205{
206 char *result = NULL, *out;
207 size_t len, i;
208 wchar_t ch;
209
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100210 len = wcslen(text);
211
Victor Stinner9bee3292017-12-21 16:49:13 +0100212 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100213 if (raw_malloc) {
214 result = PyMem_RawMalloc(len + 1);
215 }
216 else {
217 result = PyMem_Malloc(len + 1);
218 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100219 if (result == NULL) {
220 return -1;
221 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100222
223 out = result;
224 for (i=0; i<len; i++) {
225 ch = text[i];
226
227 if (ch <= 0x7f) {
228 /* ASCII character */
229 *out++ = (char)ch;
230 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100231 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100232 /* UTF-8b surrogate */
233 *out++ = (char)(ch - 0xdc00);
234 }
235 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100236 if (raw_malloc) {
237 PyMem_RawFree(result);
238 }
239 else {
240 PyMem_Free(result);
241 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100242 if (error_pos != NULL) {
243 *error_pos = i;
244 }
245 if (reason) {
246 *reason = "encoding error";
247 }
248 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100249 }
250 }
251 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100252 *str = result;
253 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100254}
Victor Stinner7d35f792018-10-30 14:32:01 +0100255#else
256int
257_Py_GetForceASCII(void)
258{
259 return 0;
260}
Victor Stinnerf6e323c2018-11-23 13:37:42 +0100261
262void
263_Py_ResetForceASCII(void)
264{
265 /* nothing to do */
266}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100267#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100268
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100269
270#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
271static int
272decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
273 const char **reason, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100274{
275 wchar_t *res;
276 unsigned char *in;
277 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600278 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100279
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100280 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
281 return -1;
282 }
283 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
284 if (!res) {
285 return -1;
286 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100287
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100288 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100289 for (in = (unsigned char*)arg; *in; in++) {
290 unsigned char ch = *in;
291 if (ch < 128) {
292 *out++ = ch;
293 }
294 else {
295 if (!surrogateescape) {
296 PyMem_RawFree(res);
297 if (wlen) {
298 *wlen = in - (unsigned char*)arg;
299 }
300 if (reason) {
301 *reason = "decoding error";
302 }
303 return -2;
304 }
305 *out++ = 0xdc00 + ch;
306 }
307 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100308 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100309
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100310 if (wlen != NULL) {
311 *wlen = out - res;
312 }
313 *wstr = res;
314 return 0;
315}
316#endif /* !HAVE_MBRTOWC */
317
318static int
319decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
320 const char **reason, int surrogateescape)
Victor Stinner4e314432010-10-07 21:45:39 +0000321{
322 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100323 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000324 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200325#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000326 unsigned char *in;
327 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000328 mbstate_t mbs;
329#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100330
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100331#ifdef HAVE_BROKEN_MBSTOWCS
332 /* Some platforms have a broken implementation of
333 * mbstowcs which does not count the characters that
334 * would result from conversion. Use an upper bound.
335 */
336 argsize = strlen(arg);
337#else
338 argsize = mbstowcs(NULL, arg, 0);
339#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000340 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100341 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
342 return -1;
343 }
344 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
345 if (!res) {
346 return -1;
347 }
348
349 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000350 if (count != (size_t)-1) {
351 wchar_t *tmp;
352 /* Only use the result if it contains no
353 surrogate characters. */
354 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100355 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000356 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000357 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100358 if (wlen != NULL) {
359 *wlen = count;
360 }
361 *wstr = res;
362 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000363 }
Victor Stinner4e314432010-10-07 21:45:39 +0000364 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200365 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000366 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100367
Victor Stinner4e314432010-10-07 21:45:39 +0000368 /* Conversion failed. Fall back to escaping with surrogateescape. */
369#ifdef HAVE_MBRTOWC
370 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
371
372 /* Overallocate; as multi-byte characters are in the argument, the
373 actual output could use less memory. */
374 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100375 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
376 return -1;
377 }
378 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
379 if (!res) {
380 return -1;
381 }
382
Victor Stinner4e314432010-10-07 21:45:39 +0000383 in = (unsigned char*)arg;
384 out = res;
385 memset(&mbs, 0, sizeof mbs);
386 while (argsize) {
387 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100388 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000389 /* Reached end of string; null char stored. */
390 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100391 }
392
Victor Stinner4e314432010-10-07 21:45:39 +0000393 if (converted == (size_t)-2) {
394 /* Incomplete character. This should never happen,
395 since we provide everything that we have -
396 unless there is a bug in the C library, or I
397 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100398 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000399 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100400
Victor Stinner4e314432010-10-07 21:45:39 +0000401 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100402 if (!surrogateescape) {
403 goto decode_error;
404 }
405
Victor Stinner4e314432010-10-07 21:45:39 +0000406 /* Conversion error. Escape as UTF-8b, and start over
407 in the initial shift state. */
408 *out++ = 0xdc00 + *in++;
409 argsize--;
410 memset(&mbs, 0, sizeof mbs);
411 continue;
412 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100413
Victor Stinner76df43d2012-10-30 01:42:39 +0100414 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100415 if (!surrogateescape) {
416 goto decode_error;
417 }
418
Victor Stinner4e314432010-10-07 21:45:39 +0000419 /* Surrogate character. Escape the original
420 byte sequence with surrogateescape. */
421 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100422 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000423 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100424 }
Victor Stinner4e314432010-10-07 21:45:39 +0000425 continue;
426 }
427 /* successfully converted some bytes */
428 in += converted;
429 argsize -= converted;
430 out++;
431 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100432 if (wlen != NULL) {
433 *wlen = out - res;
434 }
435 *wstr = res;
436 return 0;
437
438decode_error:
439 PyMem_RawFree(res);
440 if (wlen) {
441 *wlen = in - (unsigned char*)arg;
442 }
443 if (reason) {
444 *reason = "decoding error";
445 }
446 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100447#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000448 /* Cannot use C locale for escaping; manually escape as if charset
449 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
450 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100451 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinnere2623772012-11-12 23:04:02 +0100452#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100453}
454
455
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100456/* Decode a byte string from the locale encoding.
457
458 Use the strict error handler if 'surrogateescape' is zero. Use the
459 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
460 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
461 can be decoded as a surrogate character, escape the bytes using the
462 surrogateescape error handler instead of decoding them.
463
Miss Islington (bot)32955292018-04-20 14:00:41 -0700464 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100465 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
466 the number of wide characters excluding the null character into *wlen.
467
468 On memory allocation failure, return -1.
469
470 On decoding error, return -2. If wlen is not NULL, write the start of
471 invalid byte sequence in the input string into *wlen. If reason is not NULL,
472 write the decoding error message into *reason.
473
474 Use the Py_EncodeLocaleEx() function to encode the character string back to
475 a byte string. */
476int
477_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
478 const char **reason,
479 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100480{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100481 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100482#ifdef __ANDROID__
483 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
484 surrogateescape);
485#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100486 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100487#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100488 }
489
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100490#if defined(__APPLE__) || defined(__ANDROID__)
491 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
492 surrogateescape);
493#else
494 if (Py_UTF8Mode == 1) {
495 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
496 surrogateescape);
497 }
498
499#ifdef USE_FORCE_ASCII
500 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100501 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100502 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100503
504 if (force_ascii) {
505 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100506 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100507 }
508#endif
509
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100510 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100511#endif /* __APPLE__ or __ANDROID__ */
512}
513
514
Victor Stinner91106cd2017-12-13 12:29:09 +0100515/* Decode a byte string from the locale encoding with the
516 surrogateescape error handler: undecodable bytes are decoded as characters
517 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
518 character, escape the bytes using the surrogateescape error handler instead
519 of decoding them.
520
521 Return a pointer to a newly allocated wide character string, use
522 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
523 wide characters excluding the null character into *size
524
525 Return NULL on decoding error or memory allocation error. If *size* is not
526 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
527 decoding error.
528
529 Decoding errors should never happen, unless there is a bug in the C
530 library.
531
532 Use the Py_EncodeLocale() function to encode the character string back to a
533 byte string. */
534wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100535Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100536{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100537 wchar_t *wstr;
538 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
539 if (res != 0) {
540 if (wlen != NULL) {
541 *wlen = (size_t)res;
542 }
543 return NULL;
544 }
545 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100546}
Victor Stinner91106cd2017-12-13 12:29:09 +0100547
Victor Stinner91106cd2017-12-13 12:29:09 +0100548
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100549static int
550encode_current_locale(const wchar_t *text, char **str,
551 size_t *error_pos, const char **reason,
552 int raw_malloc, int surrogateescape)
Victor Stinner91106cd2017-12-13 12:29:09 +0100553{
Victor Stinner4e314432010-10-07 21:45:39 +0000554 const size_t len = wcslen(text);
555 char *result = NULL, *bytes = NULL;
556 size_t i, size, converted;
557 wchar_t c, buf[2];
558
559 /* The function works in two steps:
560 1. compute the length of the output buffer in bytes (size)
561 2. outputs the bytes */
562 size = 0;
563 buf[1] = 0;
564 while (1) {
565 for (i=0; i < len; i++) {
566 c = text[i];
567 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100568 if (!surrogateescape) {
569 goto encode_error;
570 }
Victor Stinner4e314432010-10-07 21:45:39 +0000571 /* UTF-8b surrogate */
572 if (bytes != NULL) {
573 *bytes++ = c - 0xdc00;
574 size--;
575 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100576 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000577 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100578 }
Victor Stinner4e314432010-10-07 21:45:39 +0000579 continue;
580 }
581 else {
582 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100583 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000584 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100585 }
586 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000587 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100588 }
Victor Stinner4e314432010-10-07 21:45:39 +0000589 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100590 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000591 }
592 if (bytes != NULL) {
593 bytes += converted;
594 size -= converted;
595 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100596 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000597 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100598 }
Victor Stinner4e314432010-10-07 21:45:39 +0000599 }
600 }
601 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100602 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000603 break;
604 }
605
606 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100607 if (raw_malloc) {
608 result = PyMem_RawMalloc(size);
609 }
610 else {
611 result = PyMem_Malloc(size);
612 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100613 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100614 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100615 }
Victor Stinner4e314432010-10-07 21:45:39 +0000616 bytes = result;
617 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100618 *str = result;
619 return 0;
620
621encode_error:
622 if (raw_malloc) {
623 PyMem_RawFree(result);
624 }
625 else {
626 PyMem_Free(result);
627 }
628 if (error_pos != NULL) {
629 *error_pos = i;
630 }
631 if (reason) {
632 *reason = "encoding error";
633 }
634 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100635}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100636
637static int
638encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
639 const char **reason,
640 int raw_malloc, int current_locale, int surrogateescape)
641{
642 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100643#ifdef __ANDROID__
644 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
645 raw_malloc, surrogateescape);
646#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100647 return encode_current_locale(text, str, error_pos, reason,
648 raw_malloc, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100649#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100650 }
651
652#if defined(__APPLE__) || defined(__ANDROID__)
653 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
654 raw_malloc, surrogateescape);
655#else /* __APPLE__ */
656 if (Py_UTF8Mode == 1) {
657 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
658 raw_malloc, surrogateescape);
659 }
660
661#ifdef USE_FORCE_ASCII
662 if (force_ascii == -1) {
663 force_ascii = check_force_ascii();
664 }
665
666 if (force_ascii) {
667 return encode_ascii(text, str, error_pos, reason,
668 raw_malloc, surrogateescape);
669 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100670#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100671
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100672 return encode_current_locale(text, str, error_pos, reason,
673 raw_malloc, surrogateescape);
674#endif /* __APPLE__ or __ANDROID__ */
675}
676
Victor Stinner9dd76202017-12-21 16:20:32 +0100677static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100678encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100679 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100680{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100681 char *str;
682 int res = encode_locale_ex(text, &str, error_pos, NULL,
683 raw_malloc, current_locale, 1);
684 if (res != -2 && error_pos) {
685 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100686 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100687 if (res != 0) {
688 return NULL;
689 }
690 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100691}
692
Victor Stinner91106cd2017-12-13 12:29:09 +0100693/* Encode a wide character string to the locale encoding with the
694 surrogateescape error handler: surrogate characters in the range
695 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
696
697 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
698 the memory. Return NULL on encoding or memory allocation error.
699
700 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
701 to the index of the invalid character on encoding error.
702
703 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
704 character string. */
705char*
706Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
707{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100708 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100709}
Victor Stinner91106cd2017-12-13 12:29:09 +0100710
Victor Stinner91106cd2017-12-13 12:29:09 +0100711
Victor Stinner9dd76202017-12-21 16:20:32 +0100712/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
713 instead of PyMem_Free(). */
714char*
715_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
716{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100717 return encode_locale(text, error_pos, 1, 0);
718}
719
720
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100721int
722_Py_EncodeLocaleEx(const wchar_t *text, char **str,
723 size_t *error_pos, const char **reason,
724 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100725{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100726 return encode_locale_ex(text, str, error_pos, reason, 1,
727 current_locale, surrogateescape);
Victor Stinner4e314432010-10-07 21:45:39 +0000728}
729
Victor Stinner6672d0c2010-10-07 22:53:43 +0000730
Steve Dowerf2f373f2015-02-21 08:44:05 -0800731#ifdef MS_WINDOWS
732static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
733
734static void
735FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
736{
737 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
738 /* Cannot simply cast and dereference in_ptr,
739 since it might not be aligned properly */
740 __int64 in;
741 memcpy(&in, in_ptr, sizeof(in));
742 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
743 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
744}
745
746void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800747_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800748{
749 /* XXX endianness */
750 __int64 out;
751 out = time_in + secs_between_epochs;
752 out = out * 10000000 + nsec_in / 100;
753 memcpy(out_ptr, &out, sizeof(out));
754}
755
756/* Below, we *know* that ugo+r is 0444 */
757#if _S_IREAD != 0400
758#error Unsupported C library
759#endif
760static int
761attributes_to_mode(DWORD attr)
762{
763 int m = 0;
764 if (attr & FILE_ATTRIBUTE_DIRECTORY)
765 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
766 else
767 m |= _S_IFREG;
768 if (attr & FILE_ATTRIBUTE_READONLY)
769 m |= 0444;
770 else
771 m |= 0666;
772 return m;
773}
774
Steve Dowerbf1f3762015-02-21 15:26:02 -0800775void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200776_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
777 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800778{
779 memset(result, 0, sizeof(*result));
780 result->st_mode = attributes_to_mode(info->dwFileAttributes);
781 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
782 result->st_dev = info->dwVolumeSerialNumber;
783 result->st_rdev = result->st_dev;
784 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
785 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
786 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
787 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100788 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800789 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
790 /* first clear the S_IFMT bits */
791 result->st_mode ^= (result->st_mode & S_IFMT);
792 /* now set the bits that make this a symlink */
793 result->st_mode |= S_IFLNK;
794 }
795 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800796}
797#endif
798
799/* Return information about a file.
800
801 On POSIX, use fstat().
802
803 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800804 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
805 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800806 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200807
808 On Windows, set the last Windows error and return nonzero on error. On
809 POSIX, set errno and return nonzero on error. Fill status and return 0 on
810 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800811int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200812_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800813{
814#ifdef MS_WINDOWS
815 BY_HANDLE_FILE_INFORMATION info;
816 HANDLE h;
817 int type;
818
Steve Dower940f33a2016-09-08 11:21:54 -0700819 _Py_BEGIN_SUPPRESS_IPH
820 h = (HANDLE)_get_osfhandle(fd);
821 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800822
823 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400824 /* errno is already set by _get_osfhandle, but we also set
825 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800826 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800827 return -1;
828 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200829 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800830
831 type = GetFileType(h);
832 if (type == FILE_TYPE_UNKNOWN) {
833 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400834 if (error != 0) {
835 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800836 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400837 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800838 /* else: valid but unknown file */
839 }
840
841 if (type != FILE_TYPE_DISK) {
842 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200843 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800844 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200845 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800846 return 0;
847 }
848
849 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400850 /* The Win32 error is already set, but we also set errno for
851 callers who expect it */
852 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800853 return -1;
854 }
855
Victor Stinnere134a7f2015-03-30 10:09:31 +0200856 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800857 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100858 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800859 return 0;
860#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200861 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800862#endif
863}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800864
Victor Stinnere134a7f2015-03-30 10:09:31 +0200865/* Return information about a file.
866
867 On POSIX, use fstat().
868
869 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800870 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
871 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200872 #23152.
873
874 Raise an exception and return -1 on error. On Windows, set the last Windows
875 error on error. On POSIX, set errno on error. Fill status and return 0 on
876 success.
877
Victor Stinner6f4fae82015-04-01 18:34:32 +0200878 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
879 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200880int
881_Py_fstat(int fd, struct _Py_stat_struct *status)
882{
883 int res;
884
Victor Stinner8a1be612016-03-14 22:07:55 +0100885 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100886
Victor Stinnere134a7f2015-03-30 10:09:31 +0200887 Py_BEGIN_ALLOW_THREADS
888 res = _Py_fstat_noraise(fd, status);
889 Py_END_ALLOW_THREADS
890
891 if (res != 0) {
892#ifdef MS_WINDOWS
893 PyErr_SetFromWindowsErr(0);
894#else
895 PyErr_SetFromErrno(PyExc_OSError);
896#endif
897 return -1;
898 }
899 return 0;
900}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800901
Victor Stinner6672d0c2010-10-07 22:53:43 +0000902/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
903 call stat() otherwise. Only fill st_mode attribute on Windows.
904
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100905 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
906 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000907
908int
Victor Stinnera4a75952010-10-07 22:23:10 +0000909_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000910{
911#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000912 int err;
913 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300914 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000915
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300916 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +0100917 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100918 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300919
Victor Stinneree587ea2011-11-17 00:51:38 +0100920 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000921 if (!err)
922 statbuf->st_mode = wstatbuf.st_mode;
923 return err;
924#else
925 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300926 PyObject *bytes;
927 char *cpath;
928
929 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000930 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100931 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300932
933 /* check for embedded null bytes */
934 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
935 Py_DECREF(bytes);
936 return -2;
937 }
938
939 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000940 Py_DECREF(bytes);
941 return ret;
942#endif
943}
944
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100945
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800946/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +0200947static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200948get_inheritable(int fd, int raise)
949{
950#ifdef MS_WINDOWS
951 HANDLE handle;
952 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000953
Steve Dower8fc89802015-04-12 00:26:27 -0400954 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200955 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400956 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200957 if (handle == INVALID_HANDLE_VALUE) {
958 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700959 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200960 return -1;
961 }
962
963 if (!GetHandleInformation(handle, &flags)) {
964 if (raise)
965 PyErr_SetFromWindowsErr(0);
966 return -1;
967 }
968
969 return (flags & HANDLE_FLAG_INHERIT);
970#else
971 int flags;
972
973 flags = fcntl(fd, F_GETFD, 0);
974 if (flags == -1) {
975 if (raise)
976 PyErr_SetFromErrno(PyExc_OSError);
977 return -1;
978 }
979 return !(flags & FD_CLOEXEC);
980#endif
981}
982
983/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200984 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200985 raise an exception and return -1 on error. */
986int
987_Py_get_inheritable(int fd)
988{
989 return get_inheritable(fd, 1);
990}
991
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800992
993/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200994static int
995set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
996{
997#ifdef MS_WINDOWS
998 HANDLE handle;
999 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001000#else
1001#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1002 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001003 int request;
1004 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001005#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001006 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001007 int res;
1008#endif
1009
1010 /* atomic_flag_works can only be used to make the file descriptor
1011 non-inheritable */
1012 assert(!(atomic_flag_works != NULL && inheritable));
1013
1014 if (atomic_flag_works != NULL && !inheritable) {
1015 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001016 int isInheritable = get_inheritable(fd, raise);
1017 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001018 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001019 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001020 }
1021
1022 if (*atomic_flag_works)
1023 return 0;
1024 }
1025
1026#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001027 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001028 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001029 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001030 if (handle == INVALID_HANDLE_VALUE) {
1031 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001032 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001033 return -1;
1034 }
1035
1036 if (inheritable)
1037 flags = HANDLE_FLAG_INHERIT;
1038 else
1039 flags = 0;
1040 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1041 if (raise)
1042 PyErr_SetFromWindowsErr(0);
1043 return -1;
1044 }
1045 return 0;
1046
Victor Stinnerdaf45552013-08-28 00:53:59 +02001047#else
Victor Stinner282124b2014-09-02 11:41:04 +02001048
1049#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001050 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001051 /* fast-path: ioctl() only requires one syscall */
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001052 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1053 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001054 if (inheritable)
1055 request = FIONCLEX;
1056 else
1057 request = FIOCLEX;
1058 err = ioctl(fd, request, NULL);
1059 if (!err) {
1060 ioctl_works = 1;
1061 return 0;
1062 }
1063
Victor Stinner3116cc42016-05-19 16:46:18 +02001064 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001065 if (raise)
1066 PyErr_SetFromErrno(PyExc_OSError);
1067 return -1;
1068 }
1069 else {
1070 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1071 device". The ioctl is declared but not supported by the kernel.
1072 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001073 Illumos-based OS for example.
1074
1075 Issue #27057: When SELinux policy disallows ioctl it will fail
1076 with EACCES. While FIOCLEX is safe operation it may be
1077 unavailable because ioctl was denied altogether.
1078 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001079 ioctl_works = 0;
1080 }
1081 /* fallback to fcntl() if ioctl() does not work */
1082 }
1083#endif
1084
1085 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001086 flags = fcntl(fd, F_GETFD);
1087 if (flags < 0) {
1088 if (raise)
1089 PyErr_SetFromErrno(PyExc_OSError);
1090 return -1;
1091 }
1092
Victor Stinnera858bbd2016-04-17 16:51:52 +02001093 if (inheritable) {
1094 new_flags = flags & ~FD_CLOEXEC;
1095 }
1096 else {
1097 new_flags = flags | FD_CLOEXEC;
1098 }
1099
1100 if (new_flags == flags) {
1101 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1102 return 0;
1103 }
1104
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001105 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001106 if (res < 0) {
1107 if (raise)
1108 PyErr_SetFromErrno(PyExc_OSError);
1109 return -1;
1110 }
1111 return 0;
1112#endif
1113}
1114
1115/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001116 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001117static int
1118make_non_inheritable(int fd)
1119{
1120 return set_inheritable(fd, 0, 0, NULL);
1121}
1122
1123/* Set the inheritable flag of the specified file descriptor.
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001124 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001125
1126 If atomic_flag_works is not NULL:
1127
1128 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1129 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1130 set the inheritable flag
1131 * if *atomic_flag_works==1: do nothing
1132 * if *atomic_flag_works==0: set inheritable flag to False
1133
1134 Set atomic_flag_works to NULL if no atomic flag was used to create the
1135 file descriptor.
1136
1137 atomic_flag_works can only be used to make a file descriptor
1138 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1139int
1140_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1141{
1142 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1143}
1144
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001145/* Same as _Py_set_inheritable() but on error, set errno and
1146 don't raise an exception.
1147 This function is async-signal-safe. */
1148int
1149_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1150{
1151 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1152}
1153
Victor Stinnera555cfc2015-03-18 00:22:14 +01001154static int
1155_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001156{
1157 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001158 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001159#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001160 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001161#endif
1162
1163#ifdef MS_WINDOWS
1164 flags |= O_NOINHERIT;
1165#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001166 atomic_flag_works = &_Py_open_cloexec_works;
1167 flags |= O_CLOEXEC;
1168#else
1169 atomic_flag_works = NULL;
1170#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001171
Victor Stinnera555cfc2015-03-18 00:22:14 +01001172 if (gil_held) {
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001173 do {
1174 Py_BEGIN_ALLOW_THREADS
1175 fd = open(pathname, flags);
1176 Py_END_ALLOW_THREADS
1177 } while (fd < 0
1178 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1179 if (async_err)
1180 return -1;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001181 if (fd < 0) {
1182 PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1183 return -1;
1184 }
1185 }
1186 else {
1187 fd = open(pathname, flags);
1188 if (fd < 0)
1189 return -1;
1190 }
1191
1192#ifndef MS_WINDOWS
1193 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001194 close(fd);
1195 return -1;
1196 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001197#endif
1198
Victor Stinnerdaf45552013-08-28 00:53:59 +02001199 return fd;
1200}
1201
Victor Stinnera555cfc2015-03-18 00:22:14 +01001202/* Open a file with the specified flags (wrapper to open() function).
1203 Return a file descriptor on success. Raise an exception and return -1 on
1204 error.
1205
1206 The file descriptor is created non-inheritable.
1207
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001208 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1209 except if the Python signal handler raises an exception.
1210
Victor Stinner6f4fae82015-04-01 18:34:32 +02001211 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001212int
1213_Py_open(const char *pathname, int flags)
1214{
1215 /* _Py_open() must be called with the GIL held. */
1216 assert(PyGILState_Check());
1217 return _Py_open_impl(pathname, flags, 1);
1218}
1219
1220/* Open a file with the specified flags (wrapper to open() function).
1221 Return a file descriptor on success. Set errno and return -1 on error.
1222
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001223 The file descriptor is created non-inheritable.
1224
1225 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001226int
1227_Py_open_noraise(const char *pathname, int flags)
1228{
1229 return _Py_open_impl(pathname, flags, 0);
1230}
1231
Victor Stinnerdaf45552013-08-28 00:53:59 +02001232/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001233 encoding and use fopen() otherwise.
1234
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001235 The file descriptor is created non-inheritable.
1236
1237 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001238FILE *
1239_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1240{
Victor Stinner4e314432010-10-07 21:45:39 +00001241 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001242#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001243 char *cpath;
1244 char cmode[10];
1245 size_t r;
1246 r = wcstombs(cmode, mode, 10);
1247 if (r == (size_t)-1 || r >= 10) {
1248 errno = EINVAL;
1249 return NULL;
1250 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001251 cpath = _Py_EncodeLocaleRaw(path, NULL);
1252 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001253 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001254 }
Victor Stinner4e314432010-10-07 21:45:39 +00001255 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001256 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001257#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001258 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001259#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001260 if (f == NULL)
1261 return NULL;
1262 if (make_non_inheritable(fileno(f)) < 0) {
1263 fclose(f);
1264 return NULL;
1265 }
1266 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001267}
1268
Victor Stinnere42ccd22015-03-18 01:39:23 +01001269/* Wrapper to fopen().
1270
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001271 The file descriptor is created non-inheritable.
1272
1273 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001274FILE*
1275_Py_fopen(const char *pathname, const char *mode)
1276{
1277 FILE *f = fopen(pathname, mode);
1278 if (f == NULL)
1279 return NULL;
1280 if (make_non_inheritable(fileno(f)) < 0) {
1281 fclose(f);
1282 return NULL;
1283 }
1284 return f;
1285}
1286
1287/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001288 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001289
Victor Stinnere42ccd22015-03-18 01:39:23 +01001290 Return the new file object on success. Raise an exception and return NULL
1291 on error.
1292
1293 The file descriptor is created non-inheritable.
1294
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001295 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1296 except if the Python signal handler raises an exception.
1297
Victor Stinner6f4fae82015-04-01 18:34:32 +02001298 Release the GIL to call _wfopen() or fopen(). The caller must hold
1299 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001300FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001301_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001302{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001303 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001304 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001305#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001306 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001307 wchar_t wmode[10];
1308 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001309
Victor Stinnere42ccd22015-03-18 01:39:23 +01001310 assert(PyGILState_Check());
1311
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001312 if (!PyUnicode_Check(path)) {
1313 PyErr_Format(PyExc_TypeError,
1314 "str file path expected under Windows, got %R",
1315 Py_TYPE(path));
1316 return NULL;
1317 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001318 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001319 if (wpath == NULL)
1320 return NULL;
1321
Miss Islington (bot)ca82e3c2018-02-18 10:40:07 -08001322 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1323 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001324 if (usize == 0) {
1325 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001326 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001327 }
Victor Stinner4e314432010-10-07 21:45:39 +00001328
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001329 do {
1330 Py_BEGIN_ALLOW_THREADS
1331 f = _wfopen(wpath, wmode);
1332 Py_END_ALLOW_THREADS
1333 } while (f == NULL
1334 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001335#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001336 PyObject *bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001337 char *path_bytes;
1338
1339 assert(PyGILState_Check());
1340
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001341 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001342 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001343 path_bytes = PyBytes_AS_STRING(bytes);
1344
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001345 do {
1346 Py_BEGIN_ALLOW_THREADS
1347 f = fopen(path_bytes, mode);
1348 Py_END_ALLOW_THREADS
1349 } while (f == NULL
1350 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001351
Victor Stinner4e314432010-10-07 21:45:39 +00001352 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001353#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001354 if (async_err)
1355 return NULL;
1356
Victor Stinnere42ccd22015-03-18 01:39:23 +01001357 if (f == NULL) {
1358 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001359 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001360 }
1361
1362 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001363 fclose(f);
1364 return NULL;
1365 }
1366 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001367}
1368
Victor Stinner66aab0c2015-03-19 22:53:20 +01001369/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001370
1371 On success, return the number of read bytes, it can be lower than count.
1372 If the current file offset is at or past the end of file, no bytes are read,
1373 and read() returns zero.
1374
1375 On error, raise an exception, set errno and return -1.
1376
1377 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1378 If the Python signal handler raises an exception, the function returns -1
1379 (the syscall is not retried).
1380
1381 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001382Py_ssize_t
1383_Py_read(int fd, void *buf, size_t count)
1384{
1385 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001386 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001387 int async_err = 0;
1388
Victor Stinner8a1be612016-03-14 22:07:55 +01001389 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001390
Victor Stinner66aab0c2015-03-19 22:53:20 +01001391 /* _Py_read() must not be called with an exception set, otherwise the
1392 * caller may think that read() was interrupted by a signal and the signal
1393 * handler raised an exception. */
1394 assert(!PyErr_Occurred());
1395
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001396 if (count > _PY_READ_MAX) {
1397 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001398 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001399
Steve Dower8fc89802015-04-12 00:26:27 -04001400 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001401 do {
1402 Py_BEGIN_ALLOW_THREADS
1403 errno = 0;
1404#ifdef MS_WINDOWS
1405 n = read(fd, buf, (int)count);
1406#else
1407 n = read(fd, buf, count);
1408#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001409 /* save/restore errno because PyErr_CheckSignals()
1410 * and PyErr_SetFromErrno() can modify it */
1411 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001412 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001413 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001414 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001415 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001416
1417 if (async_err) {
1418 /* read() was interrupted by a signal (failed with EINTR)
1419 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001420 errno = err;
1421 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001422 return -1;
1423 }
1424 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001425 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001426 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001427 return -1;
1428 }
1429
1430 return n;
1431}
1432
Victor Stinner82c3e452015-04-01 18:34:45 +02001433static Py_ssize_t
1434_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001435{
1436 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001437 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001438 int async_err = 0;
1439
Steve Dower8fc89802015-04-12 00:26:27 -04001440 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001441#ifdef MS_WINDOWS
1442 if (count > 32767 && isatty(fd)) {
1443 /* Issue #11395: the Windows console returns an error (12: not
1444 enough space error) on writing into stdout if stdout mode is
1445 binary and the length is greater than 66,000 bytes (or less,
1446 depending on heap usage). */
1447 count = 32767;
1448 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001449#endif
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001450 if (count > _PY_WRITE_MAX) {
1451 count = _PY_WRITE_MAX;
1452 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001453
Victor Stinner82c3e452015-04-01 18:34:45 +02001454 if (gil_held) {
1455 do {
1456 Py_BEGIN_ALLOW_THREADS
1457 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001458#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001459 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001460#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001461 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001462#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001463 /* save/restore errno because PyErr_CheckSignals()
1464 * and PyErr_SetFromErrno() can modify it */
1465 err = errno;
1466 Py_END_ALLOW_THREADS
1467 } while (n < 0 && err == EINTR &&
1468 !(async_err = PyErr_CheckSignals()));
1469 }
1470 else {
1471 do {
1472 errno = 0;
1473#ifdef MS_WINDOWS
1474 n = write(fd, buf, (int)count);
1475#else
1476 n = write(fd, buf, count);
1477#endif
1478 err = errno;
1479 } while (n < 0 && err == EINTR);
1480 }
Steve Dower8fc89802015-04-12 00:26:27 -04001481 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001482
1483 if (async_err) {
1484 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001485 and the Python signal handler raised an exception (if gil_held is
1486 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001487 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001488 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001489 return -1;
1490 }
1491 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001492 if (gil_held)
1493 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001494 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001495 return -1;
1496 }
1497
1498 return n;
1499}
1500
Victor Stinner82c3e452015-04-01 18:34:45 +02001501/* Write count bytes of buf into fd.
1502
1503 On success, return the number of written bytes, it can be lower than count
1504 including 0. On error, raise an exception, set errno and return -1.
1505
1506 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1507 If the Python signal handler raises an exception, the function returns -1
1508 (the syscall is not retried).
1509
1510 Release the GIL to call write(). The caller must hold the GIL. */
1511Py_ssize_t
1512_Py_write(int fd, const void *buf, size_t count)
1513{
Victor Stinner8a1be612016-03-14 22:07:55 +01001514 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001515
Victor Stinner82c3e452015-04-01 18:34:45 +02001516 /* _Py_write() must not be called with an exception set, otherwise the
1517 * caller may think that write() was interrupted by a signal and the signal
1518 * handler raised an exception. */
1519 assert(!PyErr_Occurred());
1520
1521 return _Py_write_impl(fd, buf, count, 1);
1522}
1523
1524/* Write count bytes of buf into fd.
1525 *
1526 * On success, return the number of written bytes, it can be lower than count
1527 * including 0. On error, set errno and return -1.
1528 *
1529 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1530 * without calling the Python signal handler. */
1531Py_ssize_t
1532_Py_write_noraise(int fd, const void *buf, size_t count)
1533{
1534 return _Py_write_impl(fd, buf, count, 0);
1535}
1536
Victor Stinner4e314432010-10-07 21:45:39 +00001537#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001538
1539/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001540 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001541
Victor Stinner4e314432010-10-07 21:45:39 +00001542int
1543_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1544{
1545 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001546 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001547 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001548 int res;
1549 size_t r1;
1550
Victor Stinner9dd76202017-12-21 16:20:32 +01001551 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001552 if (cpath == NULL) {
1553 errno = EINVAL;
1554 return -1;
1555 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001556 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner9dd76202017-12-21 16:20:32 +01001557 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001558 if (res == -1)
1559 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001560 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001561 errno = EINVAL;
1562 return -1;
1563 }
1564 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001565 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001566 if (wbuf == NULL) {
1567 errno = EINVAL;
1568 return -1;
1569 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001570 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001571 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001572 errno = EINVAL;
1573 return -1;
1574 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001575 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001576 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001577 return (int)r1;
1578}
1579#endif
1580
1581#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001582
1583/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001584 encoding, decode the result from the locale encoding.
1585 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001586
Victor Stinner4e314432010-10-07 21:45:39 +00001587wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001588_Py_wrealpath(const wchar_t *path,
1589 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001590{
1591 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001592 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001593 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001594 char *res;
1595 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001596 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001597 if (cpath == NULL) {
1598 errno = EINVAL;
1599 return NULL;
1600 }
1601 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001602 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001603 if (res == NULL)
1604 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001605
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001606 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001607 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001608 errno = EINVAL;
1609 return NULL;
1610 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001611 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001612 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001613 errno = EINVAL;
1614 return NULL;
1615 }
1616 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001617 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001618 return resolved_path;
1619}
1620#endif
1621
Victor Stinnerf4061da2010-10-14 12:37:19 +00001622/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001623 including the null character. Decode the path from the locale encoding.
1624 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001625
Victor Stinner4e314432010-10-07 21:45:39 +00001626wchar_t*
1627_Py_wgetcwd(wchar_t *buf, size_t size)
1628{
1629#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001630 int isize = (int)Py_MIN(size, INT_MAX);
1631 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001632#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001633 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001634 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001635 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001636
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001637 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001638 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001639 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001640 if (wname == NULL)
1641 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001642 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001643 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001644 return NULL;
1645 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001646 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001647 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001648 return buf;
1649#endif
1650}
1651
Victor Stinnerdaf45552013-08-28 00:53:59 +02001652/* Duplicate a file descriptor. The new file descriptor is created as
1653 non-inheritable. Return a new file descriptor on success, raise an OSError
1654 exception and return -1 on error.
1655
1656 The GIL is released to call dup(). The caller must hold the GIL. */
1657int
1658_Py_dup(int fd)
1659{
1660#ifdef MS_WINDOWS
1661 HANDLE handle;
1662 DWORD ftype;
1663#endif
1664
Victor Stinner8a1be612016-03-14 22:07:55 +01001665 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001666
Victor Stinnerdaf45552013-08-28 00:53:59 +02001667#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001668 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001669 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001670 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001671 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001672 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001673 return -1;
1674 }
1675
1676 /* get the file type, ignore the error if it failed */
1677 ftype = GetFileType(handle);
1678
1679 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001680 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001681 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001682 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001683 Py_END_ALLOW_THREADS
1684 if (fd < 0) {
1685 PyErr_SetFromErrno(PyExc_OSError);
1686 return -1;
1687 }
1688
1689 /* Character files like console cannot be make non-inheritable */
1690 if (ftype != FILE_TYPE_CHAR) {
1691 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001692 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001693 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001694 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001695 return -1;
1696 }
1697 }
1698#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1699 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001700 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001701 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001702 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001703 Py_END_ALLOW_THREADS
1704 if (fd < 0) {
1705 PyErr_SetFromErrno(PyExc_OSError);
1706 return -1;
1707 }
1708
1709#else
1710 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001711 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001712 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001713 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001714 Py_END_ALLOW_THREADS
1715 if (fd < 0) {
1716 PyErr_SetFromErrno(PyExc_OSError);
1717 return -1;
1718 }
1719
1720 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001721 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001722 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001723 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001724 return -1;
1725 }
1726#endif
1727 return fd;
1728}
1729
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001730#ifndef MS_WINDOWS
1731/* Get the blocking mode of the file descriptor.
1732 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1733 raise an exception and return -1 on error. */
1734int
1735_Py_get_blocking(int fd)
1736{
Steve Dower8fc89802015-04-12 00:26:27 -04001737 int flags;
1738 _Py_BEGIN_SUPPRESS_IPH
1739 flags = fcntl(fd, F_GETFL, 0);
1740 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001741 if (flags < 0) {
1742 PyErr_SetFromErrno(PyExc_OSError);
1743 return -1;
1744 }
1745
1746 return !(flags & O_NONBLOCK);
1747}
1748
1749/* Set the blocking mode of the specified file descriptor.
1750
1751 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1752 otherwise.
1753
1754 Return 0 on success, raise an exception and return -1 on error. */
1755int
1756_Py_set_blocking(int fd, int blocking)
1757{
1758#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1759 int arg = !blocking;
1760 if (ioctl(fd, FIONBIO, &arg) < 0)
1761 goto error;
1762#else
1763 int flags, res;
1764
Steve Dower8fc89802015-04-12 00:26:27 -04001765 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001766 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001767 if (flags >= 0) {
1768 if (blocking)
1769 flags = flags & (~O_NONBLOCK);
1770 else
1771 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001772
Steve Dower8fc89802015-04-12 00:26:27 -04001773 res = fcntl(fd, F_SETFL, flags);
1774 } else {
1775 res = -1;
1776 }
1777 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001778
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001779 if (res < 0)
1780 goto error;
1781#endif
1782 return 0;
1783
1784error:
1785 PyErr_SetFromErrno(PyExc_OSError);
1786 return -1;
1787}
1788#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01001789
1790
1791int
1792_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1793 const char **grouping)
1794{
1795 int res = -1;
1796
1797 struct lconv *lc = localeconv();
1798
1799 int change_locale = 0;
1800 if (decimal_point != NULL &&
1801 (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1802 {
1803 change_locale = 1;
1804 }
1805 if (thousands_sep != NULL &&
1806 (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1807 {
1808 change_locale = 1;
1809 }
1810
1811 /* Keep a copy of the LC_CTYPE locale */
1812 char *oldloc = NULL, *loc = NULL;
1813 if (change_locale) {
1814 oldloc = setlocale(LC_CTYPE, NULL);
1815 if (!oldloc) {
Victor Stinner6eff6b82018-11-20 22:06:21 +01001816 PyErr_SetString(PyExc_RuntimeWarning, "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01001817 return -1;
1818 }
1819
1820 oldloc = _PyMem_Strdup(oldloc);
1821 if (!oldloc) {
1822 PyErr_NoMemory();
1823 return -1;
1824 }
1825
1826 loc = setlocale(LC_NUMERIC, NULL);
1827 if (loc != NULL && strcmp(loc, oldloc) == 0) {
1828 loc = NULL;
1829 }
1830
1831 if (loc != NULL) {
Victor Stinner6eff6b82018-11-20 22:06:21 +01001832 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01001833 if LC_NUMERIC locale is different than LC_CTYPE locale and
1834 decimal_point and/or thousands_sep are non-ASCII or longer than
1835 1 byte */
1836 setlocale(LC_CTYPE, loc);
1837 }
1838 }
1839
1840 if (decimal_point != NULL) {
1841 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1842 if (*decimal_point == NULL) {
1843 goto error;
1844 }
1845 }
1846 if (thousands_sep != NULL) {
1847 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1848 if (*thousands_sep == NULL) {
1849 goto error;
1850 }
1851 }
1852
1853 if (grouping != NULL) {
1854 *grouping = lc->grouping;
1855 }
1856
1857 res = 0;
1858
1859error:
1860 if (loc != NULL) {
1861 setlocale(LC_CTYPE, oldloc);
1862 }
1863 PyMem_Free(oldloc);
1864 return res;
1865}