blob: b77e490ce2363b4fe33edfd2fb8ca5e791d9994f [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04008extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +00009#endif
Victor Stinner4e314432010-10-07 21:45:39 +000010
Brett Cannonefb00c02012-02-29 18:31:31 -050011#ifdef HAVE_LANGINFO_H
12#include <langinfo.h>
13#endif
14
Victor Stinnerdaf45552013-08-28 00:53:59 +020015#ifdef HAVE_SYS_IOCTL_H
16#include <sys/ioctl.h>
17#endif
18
19#ifdef HAVE_FCNTL_H
20#include <fcntl.h>
21#endif /* HAVE_FCNTL_H */
22
Victor Stinnerdaf45552013-08-28 00:53:59 +020023#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020024/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020025
26 -1: unknown
27 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28 1: open() supports O_CLOEXEC flag, close-on-exec is set
29
Victor Stinnera555cfc2015-03-18 00:22:14 +010030 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020032int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
Steve Dower8fc89802015-04-12 00:26:27 -040041 int valid;
42 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070043 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040044 _Py_END_SUPPRESS_IPH
45 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040047
Victor Stinner14b9b112013-06-25 00:37:25 +020048#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050049 if (fd == 0)
50 cp = GetConsoleCP();
51 else if (fd == 1 || fd == 2)
52 cp = GetConsoleOutputCP();
53 else
54 cp = 0;
55 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56 has no console */
57 if (cp != 0)
58 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59#elif defined(CODESET)
60 {
61 char *codeset = nl_langinfo(CODESET);
62 if (codeset != NULL && codeset[0] != 0)
63 return PyUnicode_FromString(codeset);
64 }
65#endif
66 Py_RETURN_NONE;
67}
68
Victor Stinner7ed7aea2018-01-15 10:45:49 +010069#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70
71#define USE_FORCE_ASCII
72
Victor Stinnerd45c7f82012-12-04 01:34:47 +010073extern int _Py_normalize_encoding(const char *, char *, size_t);
74
75/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76 On these operating systems, nl_langinfo(CODESET) announces an alias of the
77 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79 locale.getpreferredencoding() codec. For example, if command line arguments
80 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81 UnicodeEncodeError instead of retrieving the original byte string.
82
83 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85 one byte in range 0x80-0xff can be decoded from the locale encoding. The
86 workaround is also enabled on error, for example if getting the locale
87 failed.
88
Philip Jenvey215c49a2013-01-15 13:24:12 -080089 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010090
Victor Stinnerf6a271a2014-08-01 12:28:48 +020091 1: the workaround is used: Py_EncodeLocale() uses
92 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +010093 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020094 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 -1: unknown, need to call check_force_ascii() to get the value
97*/
98static int force_ascii = -1;
99
100static int
101check_force_ascii(void)
102{
103 char *loc;
104#if defined(HAVE_LANGINFO_H) && defined(CODESET)
105 char *codeset, **alias;
Victor Stinner54de2b12016-09-09 23:11:52 -0700106 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100107 int is_ascii;
108 unsigned int i;
109 char* ascii_aliases[] = {
110 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700111 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100112 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700113 "ansi_x3.4_1968",
114 "ansi_x3.4_1986",
115 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 "cp367",
117 "csascii",
118 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700119 "iso646_us",
120 "iso_646.irv_1991",
121 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100122 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700123 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100124 NULL
125 };
126#endif
127
128 loc = setlocale(LC_CTYPE, NULL);
129 if (loc == NULL)
130 goto error;
Victor Stinner65ef7422018-08-28 13:51:20 +0200131 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100132 /* the LC_CTYPE locale is different than C */
133 return 0;
134 }
135
136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
137 codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143 goto error;
144
145 is_ascii = 0;
146 for (alias=ascii_aliases; *alias != NULL; alias++) {
147 if (strcmp(encoding, *alias) == 0) {
148 is_ascii = 1;
149 break;
150 }
151 }
152 if (!is_ascii) {
153 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154 return 0;
155 }
156
157 for (i=0x80; i<0xff; i++) {
158 unsigned char ch;
159 wchar_t wch;
160 size_t res;
161
162 ch = (unsigned char)i;
163 res = mbstowcs(&wch, (char*)&ch, 1);
164 if (res != (size_t)-1) {
165 /* decoding a non-ASCII character from the locale encoding succeed:
166 the locale encoding is not ASCII, force ASCII */
167 return 1;
168 }
169 }
170 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171 encoding: the locale encoding is really ASCII */
172 return 0;
173#else
174 /* nl_langinfo(CODESET) is not available: always force ASCII */
175 return 1;
176#endif
177
178error:
Martin Panter46f50722016-05-26 05:35:26 +0000179 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 return 1;
181}
182
Victor Stinner21220bb2018-10-30 12:59:20 +0100183
184int
185_Py_GetForceASCII(void)
186{
187 if (force_ascii == -1) {
188 force_ascii = check_force_ascii();
189 }
190 return force_ascii;
191}
192
193
194
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100195static int
196encode_ascii(const wchar_t *text, char **str,
197 size_t *error_pos, const char **reason,
198 int raw_malloc, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100199{
200 char *result = NULL, *out;
201 size_t len, i;
202 wchar_t ch;
203
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100204 len = wcslen(text);
205
Victor Stinner9bee3292017-12-21 16:49:13 +0100206 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100207 if (raw_malloc) {
208 result = PyMem_RawMalloc(len + 1);
209 }
210 else {
211 result = PyMem_Malloc(len + 1);
212 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100213 if (result == NULL) {
214 return -1;
215 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100216
217 out = result;
218 for (i=0; i<len; i++) {
219 ch = text[i];
220
221 if (ch <= 0x7f) {
222 /* ASCII character */
223 *out++ = (char)ch;
224 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100225 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100226 /* UTF-8b surrogate */
227 *out++ = (char)(ch - 0xdc00);
228 }
229 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100230 if (raw_malloc) {
231 PyMem_RawFree(result);
232 }
233 else {
234 PyMem_Free(result);
235 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100236 if (error_pos != NULL) {
237 *error_pos = i;
238 }
239 if (reason) {
240 *reason = "encoding error";
241 }
242 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100243 }
244 }
245 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100246 *str = result;
247 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100248}
Victor Stinner7d35f792018-10-30 14:32:01 +0100249#else
250int
251_Py_GetForceASCII(void)
252{
253 return 0;
254}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100255#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100256
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100257
258#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
259static int
260decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
261 const char **reason, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100262{
263 wchar_t *res;
264 unsigned char *in;
265 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600266 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100267
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100268 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
269 return -1;
270 }
271 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
272 if (!res) {
273 return -1;
274 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100275
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100276 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100277 for (in = (unsigned char*)arg; *in; in++) {
278 unsigned char ch = *in;
279 if (ch < 128) {
280 *out++ = ch;
281 }
282 else {
283 if (!surrogateescape) {
284 PyMem_RawFree(res);
285 if (wlen) {
286 *wlen = in - (unsigned char*)arg;
287 }
288 if (reason) {
289 *reason = "decoding error";
290 }
291 return -2;
292 }
293 *out++ = 0xdc00 + ch;
294 }
295 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100296 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100297
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100298 if (wlen != NULL) {
299 *wlen = out - res;
300 }
301 *wstr = res;
302 return 0;
303}
304#endif /* !HAVE_MBRTOWC */
305
306static int
307decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
308 const char **reason, int surrogateescape)
Victor Stinner4e314432010-10-07 21:45:39 +0000309{
310 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100311 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000312 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200313#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000314 unsigned char *in;
315 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000316 mbstate_t mbs;
317#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100318
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100319#ifdef HAVE_BROKEN_MBSTOWCS
320 /* Some platforms have a broken implementation of
321 * mbstowcs which does not count the characters that
322 * would result from conversion. Use an upper bound.
323 */
324 argsize = strlen(arg);
325#else
326 argsize = mbstowcs(NULL, arg, 0);
327#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000328 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100329 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
330 return -1;
331 }
332 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
333 if (!res) {
334 return -1;
335 }
336
337 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000338 if (count != (size_t)-1) {
339 wchar_t *tmp;
340 /* Only use the result if it contains no
341 surrogate characters. */
342 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100343 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000344 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000345 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100346 if (wlen != NULL) {
347 *wlen = count;
348 }
349 *wstr = res;
350 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000351 }
Victor Stinner4e314432010-10-07 21:45:39 +0000352 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200353 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000354 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100355
Victor Stinner4e314432010-10-07 21:45:39 +0000356 /* Conversion failed. Fall back to escaping with surrogateescape. */
357#ifdef HAVE_MBRTOWC
358 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
359
360 /* Overallocate; as multi-byte characters are in the argument, the
361 actual output could use less memory. */
362 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100363 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
364 return -1;
365 }
366 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
367 if (!res) {
368 return -1;
369 }
370
Victor Stinner4e314432010-10-07 21:45:39 +0000371 in = (unsigned char*)arg;
372 out = res;
373 memset(&mbs, 0, sizeof mbs);
374 while (argsize) {
375 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100376 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000377 /* Reached end of string; null char stored. */
378 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100379 }
380
Victor Stinner4e314432010-10-07 21:45:39 +0000381 if (converted == (size_t)-2) {
382 /* Incomplete character. This should never happen,
383 since we provide everything that we have -
384 unless there is a bug in the C library, or I
385 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100386 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000387 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100388
Victor Stinner4e314432010-10-07 21:45:39 +0000389 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100390 if (!surrogateescape) {
391 goto decode_error;
392 }
393
Victor Stinner4e314432010-10-07 21:45:39 +0000394 /* Conversion error. Escape as UTF-8b, and start over
395 in the initial shift state. */
396 *out++ = 0xdc00 + *in++;
397 argsize--;
398 memset(&mbs, 0, sizeof mbs);
399 continue;
400 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100401
Victor Stinner76df43d2012-10-30 01:42:39 +0100402 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100403 if (!surrogateescape) {
404 goto decode_error;
405 }
406
Victor Stinner4e314432010-10-07 21:45:39 +0000407 /* Surrogate character. Escape the original
408 byte sequence with surrogateescape. */
409 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100410 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000411 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100412 }
Victor Stinner4e314432010-10-07 21:45:39 +0000413 continue;
414 }
415 /* successfully converted some bytes */
416 in += converted;
417 argsize -= converted;
418 out++;
419 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100420 if (wlen != NULL) {
421 *wlen = out - res;
422 }
423 *wstr = res;
424 return 0;
425
426decode_error:
427 PyMem_RawFree(res);
428 if (wlen) {
429 *wlen = in - (unsigned char*)arg;
430 }
431 if (reason) {
432 *reason = "decoding error";
433 }
434 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100435#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000436 /* Cannot use C locale for escaping; manually escape as if charset
437 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
438 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100439 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinnere2623772012-11-12 23:04:02 +0100440#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100441}
442
443
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100444/* Decode a byte string from the locale encoding.
445
446 Use the strict error handler if 'surrogateescape' is zero. Use the
447 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
448 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
449 can be decoded as a surrogate character, escape the bytes using the
450 surrogateescape error handler instead of decoding them.
451
Miss Islington (bot)32955292018-04-20 14:00:41 -0700452 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100453 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
454 the number of wide characters excluding the null character into *wlen.
455
456 On memory allocation failure, return -1.
457
458 On decoding error, return -2. If wlen is not NULL, write the start of
459 invalid byte sequence in the input string into *wlen. If reason is not NULL,
460 write the decoding error message into *reason.
461
462 Use the Py_EncodeLocaleEx() function to encode the character string back to
463 a byte string. */
464int
465_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
466 const char **reason,
467 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100468{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100469 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100470#ifdef __ANDROID__
471 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
472 surrogateescape);
473#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100474 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100475#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100476 }
477
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100478#if defined(__APPLE__) || defined(__ANDROID__)
479 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
480 surrogateescape);
481#else
482 if (Py_UTF8Mode == 1) {
483 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
484 surrogateescape);
485 }
486
487#ifdef USE_FORCE_ASCII
488 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100489 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100490 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100491
492 if (force_ascii) {
493 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100494 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100495 }
496#endif
497
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100498 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100499#endif /* __APPLE__ or __ANDROID__ */
500}
501
502
Victor Stinner91106cd2017-12-13 12:29:09 +0100503/* Decode a byte string from the locale encoding with the
504 surrogateescape error handler: undecodable bytes are decoded as characters
505 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
506 character, escape the bytes using the surrogateescape error handler instead
507 of decoding them.
508
509 Return a pointer to a newly allocated wide character string, use
510 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
511 wide characters excluding the null character into *size
512
513 Return NULL on decoding error or memory allocation error. If *size* is not
514 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
515 decoding error.
516
517 Decoding errors should never happen, unless there is a bug in the C
518 library.
519
520 Use the Py_EncodeLocale() function to encode the character string back to a
521 byte string. */
522wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100523Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100524{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100525 wchar_t *wstr;
526 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
527 if (res != 0) {
528 if (wlen != NULL) {
529 *wlen = (size_t)res;
530 }
531 return NULL;
532 }
533 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100534}
Victor Stinner91106cd2017-12-13 12:29:09 +0100535
Victor Stinner91106cd2017-12-13 12:29:09 +0100536
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100537static int
538encode_current_locale(const wchar_t *text, char **str,
539 size_t *error_pos, const char **reason,
540 int raw_malloc, int surrogateescape)
Victor Stinner91106cd2017-12-13 12:29:09 +0100541{
Victor Stinner4e314432010-10-07 21:45:39 +0000542 const size_t len = wcslen(text);
543 char *result = NULL, *bytes = NULL;
544 size_t i, size, converted;
545 wchar_t c, buf[2];
546
547 /* The function works in two steps:
548 1. compute the length of the output buffer in bytes (size)
549 2. outputs the bytes */
550 size = 0;
551 buf[1] = 0;
552 while (1) {
553 for (i=0; i < len; i++) {
554 c = text[i];
555 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100556 if (!surrogateescape) {
557 goto encode_error;
558 }
Victor Stinner4e314432010-10-07 21:45:39 +0000559 /* UTF-8b surrogate */
560 if (bytes != NULL) {
561 *bytes++ = c - 0xdc00;
562 size--;
563 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100564 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000565 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100566 }
Victor Stinner4e314432010-10-07 21:45:39 +0000567 continue;
568 }
569 else {
570 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100571 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000572 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100573 }
574 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000575 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100576 }
Victor Stinner4e314432010-10-07 21:45:39 +0000577 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100578 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000579 }
580 if (bytes != NULL) {
581 bytes += converted;
582 size -= converted;
583 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100584 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000585 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100586 }
Victor Stinner4e314432010-10-07 21:45:39 +0000587 }
588 }
589 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100590 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000591 break;
592 }
593
594 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100595 if (raw_malloc) {
596 result = PyMem_RawMalloc(size);
597 }
598 else {
599 result = PyMem_Malloc(size);
600 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100601 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100602 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100603 }
Victor Stinner4e314432010-10-07 21:45:39 +0000604 bytes = result;
605 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100606 *str = result;
607 return 0;
608
609encode_error:
610 if (raw_malloc) {
611 PyMem_RawFree(result);
612 }
613 else {
614 PyMem_Free(result);
615 }
616 if (error_pos != NULL) {
617 *error_pos = i;
618 }
619 if (reason) {
620 *reason = "encoding error";
621 }
622 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100623}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100624
625static int
626encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
627 const char **reason,
628 int raw_malloc, int current_locale, int surrogateescape)
629{
630 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100631#ifdef __ANDROID__
632 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
633 raw_malloc, surrogateescape);
634#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100635 return encode_current_locale(text, str, error_pos, reason,
636 raw_malloc, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100637#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100638 }
639
640#if defined(__APPLE__) || defined(__ANDROID__)
641 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
642 raw_malloc, surrogateescape);
643#else /* __APPLE__ */
644 if (Py_UTF8Mode == 1) {
645 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
646 raw_malloc, surrogateescape);
647 }
648
649#ifdef USE_FORCE_ASCII
650 if (force_ascii == -1) {
651 force_ascii = check_force_ascii();
652 }
653
654 if (force_ascii) {
655 return encode_ascii(text, str, error_pos, reason,
656 raw_malloc, surrogateescape);
657 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100658#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100659
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100660 return encode_current_locale(text, str, error_pos, reason,
661 raw_malloc, surrogateescape);
662#endif /* __APPLE__ or __ANDROID__ */
663}
664
Victor Stinner9dd76202017-12-21 16:20:32 +0100665static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100666encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100667 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100668{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100669 char *str;
670 int res = encode_locale_ex(text, &str, error_pos, NULL,
671 raw_malloc, current_locale, 1);
672 if (res != -2 && error_pos) {
673 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100674 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100675 if (res != 0) {
676 return NULL;
677 }
678 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100679}
680
Victor Stinner91106cd2017-12-13 12:29:09 +0100681/* Encode a wide character string to the locale encoding with the
682 surrogateescape error handler: surrogate characters in the range
683 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
684
685 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
686 the memory. Return NULL on encoding or memory allocation error.
687
688 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
689 to the index of the invalid character on encoding error.
690
691 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
692 character string. */
693char*
694Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
695{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100696 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100697}
Victor Stinner91106cd2017-12-13 12:29:09 +0100698
Victor Stinner91106cd2017-12-13 12:29:09 +0100699
Victor Stinner9dd76202017-12-21 16:20:32 +0100700/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
701 instead of PyMem_Free(). */
702char*
703_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
704{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100705 return encode_locale(text, error_pos, 1, 0);
706}
707
708
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100709int
710_Py_EncodeLocaleEx(const wchar_t *text, char **str,
711 size_t *error_pos, const char **reason,
712 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100713{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100714 return encode_locale_ex(text, str, error_pos, reason, 1,
715 current_locale, surrogateescape);
Victor Stinner4e314432010-10-07 21:45:39 +0000716}
717
Victor Stinner6672d0c2010-10-07 22:53:43 +0000718
Steve Dowerf2f373f2015-02-21 08:44:05 -0800719#ifdef MS_WINDOWS
720static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
721
722static void
723FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
724{
725 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
726 /* Cannot simply cast and dereference in_ptr,
727 since it might not be aligned properly */
728 __int64 in;
729 memcpy(&in, in_ptr, sizeof(in));
730 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
731 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
732}
733
734void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800735_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800736{
737 /* XXX endianness */
738 __int64 out;
739 out = time_in + secs_between_epochs;
740 out = out * 10000000 + nsec_in / 100;
741 memcpy(out_ptr, &out, sizeof(out));
742}
743
744/* Below, we *know* that ugo+r is 0444 */
745#if _S_IREAD != 0400
746#error Unsupported C library
747#endif
748static int
749attributes_to_mode(DWORD attr)
750{
751 int m = 0;
752 if (attr & FILE_ATTRIBUTE_DIRECTORY)
753 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
754 else
755 m |= _S_IFREG;
756 if (attr & FILE_ATTRIBUTE_READONLY)
757 m |= 0444;
758 else
759 m |= 0666;
760 return m;
761}
762
Steve Dowerbf1f3762015-02-21 15:26:02 -0800763void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200764_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
765 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800766{
767 memset(result, 0, sizeof(*result));
768 result->st_mode = attributes_to_mode(info->dwFileAttributes);
769 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
770 result->st_dev = info->dwVolumeSerialNumber;
771 result->st_rdev = result->st_dev;
772 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
773 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
774 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
775 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100776 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800777 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
778 /* first clear the S_IFMT bits */
779 result->st_mode ^= (result->st_mode & S_IFMT);
780 /* now set the bits that make this a symlink */
781 result->st_mode |= S_IFLNK;
782 }
783 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800784}
785#endif
786
787/* Return information about a file.
788
789 On POSIX, use fstat().
790
791 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800792 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
793 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800794 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200795
796 On Windows, set the last Windows error and return nonzero on error. On
797 POSIX, set errno and return nonzero on error. Fill status and return 0 on
798 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800799int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200800_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800801{
802#ifdef MS_WINDOWS
803 BY_HANDLE_FILE_INFORMATION info;
804 HANDLE h;
805 int type;
806
Steve Dower940f33a2016-09-08 11:21:54 -0700807 _Py_BEGIN_SUPPRESS_IPH
808 h = (HANDLE)_get_osfhandle(fd);
809 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800810
811 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400812 /* errno is already set by _get_osfhandle, but we also set
813 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800814 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800815 return -1;
816 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200817 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800818
819 type = GetFileType(h);
820 if (type == FILE_TYPE_UNKNOWN) {
821 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400822 if (error != 0) {
823 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800824 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400825 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800826 /* else: valid but unknown file */
827 }
828
829 if (type != FILE_TYPE_DISK) {
830 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200831 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800832 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200833 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800834 return 0;
835 }
836
837 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400838 /* The Win32 error is already set, but we also set errno for
839 callers who expect it */
840 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800841 return -1;
842 }
843
Victor Stinnere134a7f2015-03-30 10:09:31 +0200844 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800845 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100846 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800847 return 0;
848#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200849 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800850#endif
851}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800852
Victor Stinnere134a7f2015-03-30 10:09:31 +0200853/* Return information about a file.
854
855 On POSIX, use fstat().
856
857 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800858 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
859 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200860 #23152.
861
862 Raise an exception and return -1 on error. On Windows, set the last Windows
863 error on error. On POSIX, set errno on error. Fill status and return 0 on
864 success.
865
Victor Stinner6f4fae82015-04-01 18:34:32 +0200866 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
867 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200868int
869_Py_fstat(int fd, struct _Py_stat_struct *status)
870{
871 int res;
872
Victor Stinner8a1be612016-03-14 22:07:55 +0100873 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100874
Victor Stinnere134a7f2015-03-30 10:09:31 +0200875 Py_BEGIN_ALLOW_THREADS
876 res = _Py_fstat_noraise(fd, status);
877 Py_END_ALLOW_THREADS
878
879 if (res != 0) {
880#ifdef MS_WINDOWS
881 PyErr_SetFromWindowsErr(0);
882#else
883 PyErr_SetFromErrno(PyExc_OSError);
884#endif
885 return -1;
886 }
887 return 0;
888}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800889
Victor Stinner6672d0c2010-10-07 22:53:43 +0000890/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
891 call stat() otherwise. Only fill st_mode attribute on Windows.
892
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100893 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
894 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000895
896int
Victor Stinnera4a75952010-10-07 22:23:10 +0000897_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000898{
899#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000900 int err;
901 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300902 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000903
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300904 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +0100905 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100906 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300907
Victor Stinneree587ea2011-11-17 00:51:38 +0100908 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000909 if (!err)
910 statbuf->st_mode = wstatbuf.st_mode;
911 return err;
912#else
913 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300914 PyObject *bytes;
915 char *cpath;
916
917 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000918 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100919 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300920
921 /* check for embedded null bytes */
922 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
923 Py_DECREF(bytes);
924 return -2;
925 }
926
927 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000928 Py_DECREF(bytes);
929 return ret;
930#endif
931}
932
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100933
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800934/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +0200935static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200936get_inheritable(int fd, int raise)
937{
938#ifdef MS_WINDOWS
939 HANDLE handle;
940 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000941
Steve Dower8fc89802015-04-12 00:26:27 -0400942 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200943 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400944 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200945 if (handle == INVALID_HANDLE_VALUE) {
946 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700947 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200948 return -1;
949 }
950
951 if (!GetHandleInformation(handle, &flags)) {
952 if (raise)
953 PyErr_SetFromWindowsErr(0);
954 return -1;
955 }
956
957 return (flags & HANDLE_FLAG_INHERIT);
958#else
959 int flags;
960
961 flags = fcntl(fd, F_GETFD, 0);
962 if (flags == -1) {
963 if (raise)
964 PyErr_SetFromErrno(PyExc_OSError);
965 return -1;
966 }
967 return !(flags & FD_CLOEXEC);
968#endif
969}
970
971/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200972 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200973 raise an exception and return -1 on error. */
974int
975_Py_get_inheritable(int fd)
976{
977 return get_inheritable(fd, 1);
978}
979
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800980
981/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200982static int
983set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
984{
985#ifdef MS_WINDOWS
986 HANDLE handle;
987 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +0200988#else
989#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
990 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200991 int request;
992 int err;
Victor Stinner282124b2014-09-02 11:41:04 +0200993#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +0200994 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200995 int res;
996#endif
997
998 /* atomic_flag_works can only be used to make the file descriptor
999 non-inheritable */
1000 assert(!(atomic_flag_works != NULL && inheritable));
1001
1002 if (atomic_flag_works != NULL && !inheritable) {
1003 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001004 int isInheritable = get_inheritable(fd, raise);
1005 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001006 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001007 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001008 }
1009
1010 if (*atomic_flag_works)
1011 return 0;
1012 }
1013
1014#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001015 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001016 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001017 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001018 if (handle == INVALID_HANDLE_VALUE) {
1019 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001020 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001021 return -1;
1022 }
1023
1024 if (inheritable)
1025 flags = HANDLE_FLAG_INHERIT;
1026 else
1027 flags = 0;
1028 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1029 if (raise)
1030 PyErr_SetFromWindowsErr(0);
1031 return -1;
1032 }
1033 return 0;
1034
Victor Stinnerdaf45552013-08-28 00:53:59 +02001035#else
Victor Stinner282124b2014-09-02 11:41:04 +02001036
1037#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001038 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001039 /* fast-path: ioctl() only requires one syscall */
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001040 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1041 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001042 if (inheritable)
1043 request = FIONCLEX;
1044 else
1045 request = FIOCLEX;
1046 err = ioctl(fd, request, NULL);
1047 if (!err) {
1048 ioctl_works = 1;
1049 return 0;
1050 }
1051
Victor Stinner3116cc42016-05-19 16:46:18 +02001052 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001053 if (raise)
1054 PyErr_SetFromErrno(PyExc_OSError);
1055 return -1;
1056 }
1057 else {
1058 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1059 device". The ioctl is declared but not supported by the kernel.
1060 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001061 Illumos-based OS for example.
1062
1063 Issue #27057: When SELinux policy disallows ioctl it will fail
1064 with EACCES. While FIOCLEX is safe operation it may be
1065 unavailable because ioctl was denied altogether.
1066 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001067 ioctl_works = 0;
1068 }
1069 /* fallback to fcntl() if ioctl() does not work */
1070 }
1071#endif
1072
1073 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001074 flags = fcntl(fd, F_GETFD);
1075 if (flags < 0) {
1076 if (raise)
1077 PyErr_SetFromErrno(PyExc_OSError);
1078 return -1;
1079 }
1080
Victor Stinnera858bbd2016-04-17 16:51:52 +02001081 if (inheritable) {
1082 new_flags = flags & ~FD_CLOEXEC;
1083 }
1084 else {
1085 new_flags = flags | FD_CLOEXEC;
1086 }
1087
1088 if (new_flags == flags) {
1089 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1090 return 0;
1091 }
1092
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001093 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001094 if (res < 0) {
1095 if (raise)
1096 PyErr_SetFromErrno(PyExc_OSError);
1097 return -1;
1098 }
1099 return 0;
1100#endif
1101}
1102
1103/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001104 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001105static int
1106make_non_inheritable(int fd)
1107{
1108 return set_inheritable(fd, 0, 0, NULL);
1109}
1110
1111/* Set the inheritable flag of the specified file descriptor.
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001112 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001113
1114 If atomic_flag_works is not NULL:
1115
1116 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1117 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1118 set the inheritable flag
1119 * if *atomic_flag_works==1: do nothing
1120 * if *atomic_flag_works==0: set inheritable flag to False
1121
1122 Set atomic_flag_works to NULL if no atomic flag was used to create the
1123 file descriptor.
1124
1125 atomic_flag_works can only be used to make a file descriptor
1126 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1127int
1128_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1129{
1130 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1131}
1132
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001133/* Same as _Py_set_inheritable() but on error, set errno and
1134 don't raise an exception.
1135 This function is async-signal-safe. */
1136int
1137_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1138{
1139 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1140}
1141
Victor Stinnera555cfc2015-03-18 00:22:14 +01001142static int
1143_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001144{
1145 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001146 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001147#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001148 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001149#endif
1150
1151#ifdef MS_WINDOWS
1152 flags |= O_NOINHERIT;
1153#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001154 atomic_flag_works = &_Py_open_cloexec_works;
1155 flags |= O_CLOEXEC;
1156#else
1157 atomic_flag_works = NULL;
1158#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001159
Victor Stinnera555cfc2015-03-18 00:22:14 +01001160 if (gil_held) {
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001161 do {
1162 Py_BEGIN_ALLOW_THREADS
1163 fd = open(pathname, flags);
1164 Py_END_ALLOW_THREADS
1165 } while (fd < 0
1166 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1167 if (async_err)
1168 return -1;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001169 if (fd < 0) {
1170 PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1171 return -1;
1172 }
1173 }
1174 else {
1175 fd = open(pathname, flags);
1176 if (fd < 0)
1177 return -1;
1178 }
1179
1180#ifndef MS_WINDOWS
1181 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001182 close(fd);
1183 return -1;
1184 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001185#endif
1186
Victor Stinnerdaf45552013-08-28 00:53:59 +02001187 return fd;
1188}
1189
Victor Stinnera555cfc2015-03-18 00:22:14 +01001190/* Open a file with the specified flags (wrapper to open() function).
1191 Return a file descriptor on success. Raise an exception and return -1 on
1192 error.
1193
1194 The file descriptor is created non-inheritable.
1195
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001196 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1197 except if the Python signal handler raises an exception.
1198
Victor Stinner6f4fae82015-04-01 18:34:32 +02001199 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001200int
1201_Py_open(const char *pathname, int flags)
1202{
1203 /* _Py_open() must be called with the GIL held. */
1204 assert(PyGILState_Check());
1205 return _Py_open_impl(pathname, flags, 1);
1206}
1207
1208/* Open a file with the specified flags (wrapper to open() function).
1209 Return a file descriptor on success. Set errno and return -1 on error.
1210
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001211 The file descriptor is created non-inheritable.
1212
1213 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001214int
1215_Py_open_noraise(const char *pathname, int flags)
1216{
1217 return _Py_open_impl(pathname, flags, 0);
1218}
1219
Victor Stinnerdaf45552013-08-28 00:53:59 +02001220/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001221 encoding and use fopen() otherwise.
1222
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001223 The file descriptor is created non-inheritable.
1224
1225 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001226FILE *
1227_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1228{
Victor Stinner4e314432010-10-07 21:45:39 +00001229 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001230#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001231 char *cpath;
1232 char cmode[10];
1233 size_t r;
1234 r = wcstombs(cmode, mode, 10);
1235 if (r == (size_t)-1 || r >= 10) {
1236 errno = EINVAL;
1237 return NULL;
1238 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001239 cpath = _Py_EncodeLocaleRaw(path, NULL);
1240 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001241 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001242 }
Victor Stinner4e314432010-10-07 21:45:39 +00001243 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001244 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001245#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001246 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001247#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001248 if (f == NULL)
1249 return NULL;
1250 if (make_non_inheritable(fileno(f)) < 0) {
1251 fclose(f);
1252 return NULL;
1253 }
1254 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001255}
1256
Victor Stinnere42ccd22015-03-18 01:39:23 +01001257/* Wrapper to fopen().
1258
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001259 The file descriptor is created non-inheritable.
1260
1261 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001262FILE*
1263_Py_fopen(const char *pathname, const char *mode)
1264{
1265 FILE *f = fopen(pathname, mode);
1266 if (f == NULL)
1267 return NULL;
1268 if (make_non_inheritable(fileno(f)) < 0) {
1269 fclose(f);
1270 return NULL;
1271 }
1272 return f;
1273}
1274
1275/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001276 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001277
Victor Stinnere42ccd22015-03-18 01:39:23 +01001278 Return the new file object on success. Raise an exception and return NULL
1279 on error.
1280
1281 The file descriptor is created non-inheritable.
1282
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001283 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1284 except if the Python signal handler raises an exception.
1285
Victor Stinner6f4fae82015-04-01 18:34:32 +02001286 Release the GIL to call _wfopen() or fopen(). The caller must hold
1287 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001288FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001289_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001290{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001291 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001292 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001293#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001294 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001295 wchar_t wmode[10];
1296 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001297
Victor Stinnere42ccd22015-03-18 01:39:23 +01001298 assert(PyGILState_Check());
1299
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001300 if (!PyUnicode_Check(path)) {
1301 PyErr_Format(PyExc_TypeError,
1302 "str file path expected under Windows, got %R",
1303 Py_TYPE(path));
1304 return NULL;
1305 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001306 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001307 if (wpath == NULL)
1308 return NULL;
1309
Miss Islington (bot)ca82e3c2018-02-18 10:40:07 -08001310 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1311 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001312 if (usize == 0) {
1313 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001314 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001315 }
Victor Stinner4e314432010-10-07 21:45:39 +00001316
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001317 do {
1318 Py_BEGIN_ALLOW_THREADS
1319 f = _wfopen(wpath, wmode);
1320 Py_END_ALLOW_THREADS
1321 } while (f == NULL
1322 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001323#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001324 PyObject *bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001325 char *path_bytes;
1326
1327 assert(PyGILState_Check());
1328
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001329 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001330 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001331 path_bytes = PyBytes_AS_STRING(bytes);
1332
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001333 do {
1334 Py_BEGIN_ALLOW_THREADS
1335 f = fopen(path_bytes, mode);
1336 Py_END_ALLOW_THREADS
1337 } while (f == NULL
1338 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001339
Victor Stinner4e314432010-10-07 21:45:39 +00001340 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001341#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001342 if (async_err)
1343 return NULL;
1344
Victor Stinnere42ccd22015-03-18 01:39:23 +01001345 if (f == NULL) {
1346 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001347 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001348 }
1349
1350 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001351 fclose(f);
1352 return NULL;
1353 }
1354 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001355}
1356
Victor Stinner66aab0c2015-03-19 22:53:20 +01001357/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001358
1359 On success, return the number of read bytes, it can be lower than count.
1360 If the current file offset is at or past the end of file, no bytes are read,
1361 and read() returns zero.
1362
1363 On error, raise an exception, set errno and return -1.
1364
1365 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1366 If the Python signal handler raises an exception, the function returns -1
1367 (the syscall is not retried).
1368
1369 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001370Py_ssize_t
1371_Py_read(int fd, void *buf, size_t count)
1372{
1373 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001374 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001375 int async_err = 0;
1376
Victor Stinner8a1be612016-03-14 22:07:55 +01001377 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001378
Victor Stinner66aab0c2015-03-19 22:53:20 +01001379 /* _Py_read() must not be called with an exception set, otherwise the
1380 * caller may think that read() was interrupted by a signal and the signal
1381 * handler raised an exception. */
1382 assert(!PyErr_Occurred());
1383
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001384 if (count > _PY_READ_MAX) {
1385 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001386 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001387
Steve Dower8fc89802015-04-12 00:26:27 -04001388 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001389 do {
1390 Py_BEGIN_ALLOW_THREADS
1391 errno = 0;
1392#ifdef MS_WINDOWS
1393 n = read(fd, buf, (int)count);
1394#else
1395 n = read(fd, buf, count);
1396#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001397 /* save/restore errno because PyErr_CheckSignals()
1398 * and PyErr_SetFromErrno() can modify it */
1399 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001400 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001401 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001402 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001403 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001404
1405 if (async_err) {
1406 /* read() was interrupted by a signal (failed with EINTR)
1407 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001408 errno = err;
1409 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001410 return -1;
1411 }
1412 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001413 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001414 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001415 return -1;
1416 }
1417
1418 return n;
1419}
1420
Victor Stinner82c3e452015-04-01 18:34:45 +02001421static Py_ssize_t
1422_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001423{
1424 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001425 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001426 int async_err = 0;
1427
Steve Dower8fc89802015-04-12 00:26:27 -04001428 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001429#ifdef MS_WINDOWS
1430 if (count > 32767 && isatty(fd)) {
1431 /* Issue #11395: the Windows console returns an error (12: not
1432 enough space error) on writing into stdout if stdout mode is
1433 binary and the length is greater than 66,000 bytes (or less,
1434 depending on heap usage). */
1435 count = 32767;
1436 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001437#endif
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001438 if (count > _PY_WRITE_MAX) {
1439 count = _PY_WRITE_MAX;
1440 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001441
Victor Stinner82c3e452015-04-01 18:34:45 +02001442 if (gil_held) {
1443 do {
1444 Py_BEGIN_ALLOW_THREADS
1445 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001446#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001447 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001448#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001449 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001450#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001451 /* save/restore errno because PyErr_CheckSignals()
1452 * and PyErr_SetFromErrno() can modify it */
1453 err = errno;
1454 Py_END_ALLOW_THREADS
1455 } while (n < 0 && err == EINTR &&
1456 !(async_err = PyErr_CheckSignals()));
1457 }
1458 else {
1459 do {
1460 errno = 0;
1461#ifdef MS_WINDOWS
1462 n = write(fd, buf, (int)count);
1463#else
1464 n = write(fd, buf, count);
1465#endif
1466 err = errno;
1467 } while (n < 0 && err == EINTR);
1468 }
Steve Dower8fc89802015-04-12 00:26:27 -04001469 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001470
1471 if (async_err) {
1472 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001473 and the Python signal handler raised an exception (if gil_held is
1474 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001475 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001476 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001477 return -1;
1478 }
1479 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001480 if (gil_held)
1481 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001482 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001483 return -1;
1484 }
1485
1486 return n;
1487}
1488
Victor Stinner82c3e452015-04-01 18:34:45 +02001489/* Write count bytes of buf into fd.
1490
1491 On success, return the number of written bytes, it can be lower than count
1492 including 0. On error, raise an exception, set errno and return -1.
1493
1494 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1495 If the Python signal handler raises an exception, the function returns -1
1496 (the syscall is not retried).
1497
1498 Release the GIL to call write(). The caller must hold the GIL. */
1499Py_ssize_t
1500_Py_write(int fd, const void *buf, size_t count)
1501{
Victor Stinner8a1be612016-03-14 22:07:55 +01001502 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001503
Victor Stinner82c3e452015-04-01 18:34:45 +02001504 /* _Py_write() must not be called with an exception set, otherwise the
1505 * caller may think that write() was interrupted by a signal and the signal
1506 * handler raised an exception. */
1507 assert(!PyErr_Occurred());
1508
1509 return _Py_write_impl(fd, buf, count, 1);
1510}
1511
1512/* Write count bytes of buf into fd.
1513 *
1514 * On success, return the number of written bytes, it can be lower than count
1515 * including 0. On error, set errno and return -1.
1516 *
1517 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1518 * without calling the Python signal handler. */
1519Py_ssize_t
1520_Py_write_noraise(int fd, const void *buf, size_t count)
1521{
1522 return _Py_write_impl(fd, buf, count, 0);
1523}
1524
Victor Stinner4e314432010-10-07 21:45:39 +00001525#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001526
1527/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001528 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001529
Victor Stinner4e314432010-10-07 21:45:39 +00001530int
1531_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1532{
1533 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001534 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001535 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001536 int res;
1537 size_t r1;
1538
Victor Stinner9dd76202017-12-21 16:20:32 +01001539 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001540 if (cpath == NULL) {
1541 errno = EINVAL;
1542 return -1;
1543 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001544 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner9dd76202017-12-21 16:20:32 +01001545 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001546 if (res == -1)
1547 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001548 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001549 errno = EINVAL;
1550 return -1;
1551 }
1552 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001553 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001554 if (wbuf == NULL) {
1555 errno = EINVAL;
1556 return -1;
1557 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001558 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001559 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001560 errno = EINVAL;
1561 return -1;
1562 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001563 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001564 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001565 return (int)r1;
1566}
1567#endif
1568
1569#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001570
1571/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001572 encoding, decode the result from the locale encoding.
1573 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001574
Victor Stinner4e314432010-10-07 21:45:39 +00001575wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001576_Py_wrealpath(const wchar_t *path,
1577 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001578{
1579 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001580 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001581 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001582 char *res;
1583 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001584 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001585 if (cpath == NULL) {
1586 errno = EINVAL;
1587 return NULL;
1588 }
1589 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001590 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001591 if (res == NULL)
1592 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001593
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001594 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001595 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001596 errno = EINVAL;
1597 return NULL;
1598 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001599 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001600 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001601 errno = EINVAL;
1602 return NULL;
1603 }
1604 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001605 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001606 return resolved_path;
1607}
1608#endif
1609
Victor Stinnerf4061da2010-10-14 12:37:19 +00001610/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001611 including the null character. Decode the path from the locale encoding.
1612 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001613
Victor Stinner4e314432010-10-07 21:45:39 +00001614wchar_t*
1615_Py_wgetcwd(wchar_t *buf, size_t size)
1616{
1617#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001618 int isize = (int)Py_MIN(size, INT_MAX);
1619 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001620#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001621 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001622 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001623 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001624
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001625 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001626 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001627 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001628 if (wname == NULL)
1629 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001630 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001631 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001632 return NULL;
1633 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001634 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001635 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001636 return buf;
1637#endif
1638}
1639
Victor Stinnerdaf45552013-08-28 00:53:59 +02001640/* Duplicate a file descriptor. The new file descriptor is created as
1641 non-inheritable. Return a new file descriptor on success, raise an OSError
1642 exception and return -1 on error.
1643
1644 The GIL is released to call dup(). The caller must hold the GIL. */
1645int
1646_Py_dup(int fd)
1647{
1648#ifdef MS_WINDOWS
1649 HANDLE handle;
1650 DWORD ftype;
1651#endif
1652
Victor Stinner8a1be612016-03-14 22:07:55 +01001653 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001654
Victor Stinnerdaf45552013-08-28 00:53:59 +02001655#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001656 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001657 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001658 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001659 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001660 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001661 return -1;
1662 }
1663
1664 /* get the file type, ignore the error if it failed */
1665 ftype = GetFileType(handle);
1666
1667 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001668 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001669 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001670 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001671 Py_END_ALLOW_THREADS
1672 if (fd < 0) {
1673 PyErr_SetFromErrno(PyExc_OSError);
1674 return -1;
1675 }
1676
1677 /* Character files like console cannot be make non-inheritable */
1678 if (ftype != FILE_TYPE_CHAR) {
1679 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001680 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001681 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001682 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001683 return -1;
1684 }
1685 }
1686#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1687 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001688 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001689 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001690 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001691 Py_END_ALLOW_THREADS
1692 if (fd < 0) {
1693 PyErr_SetFromErrno(PyExc_OSError);
1694 return -1;
1695 }
1696
1697#else
1698 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001699 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001700 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001701 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001702 Py_END_ALLOW_THREADS
1703 if (fd < 0) {
1704 PyErr_SetFromErrno(PyExc_OSError);
1705 return -1;
1706 }
1707
1708 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001709 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001710 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001711 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001712 return -1;
1713 }
1714#endif
1715 return fd;
1716}
1717
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001718#ifndef MS_WINDOWS
1719/* Get the blocking mode of the file descriptor.
1720 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1721 raise an exception and return -1 on error. */
1722int
1723_Py_get_blocking(int fd)
1724{
Steve Dower8fc89802015-04-12 00:26:27 -04001725 int flags;
1726 _Py_BEGIN_SUPPRESS_IPH
1727 flags = fcntl(fd, F_GETFL, 0);
1728 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001729 if (flags < 0) {
1730 PyErr_SetFromErrno(PyExc_OSError);
1731 return -1;
1732 }
1733
1734 return !(flags & O_NONBLOCK);
1735}
1736
1737/* Set the blocking mode of the specified file descriptor.
1738
1739 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1740 otherwise.
1741
1742 Return 0 on success, raise an exception and return -1 on error. */
1743int
1744_Py_set_blocking(int fd, int blocking)
1745{
1746#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1747 int arg = !blocking;
1748 if (ioctl(fd, FIONBIO, &arg) < 0)
1749 goto error;
1750#else
1751 int flags, res;
1752
Steve Dower8fc89802015-04-12 00:26:27 -04001753 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001754 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001755 if (flags >= 0) {
1756 if (blocking)
1757 flags = flags & (~O_NONBLOCK);
1758 else
1759 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001760
Steve Dower8fc89802015-04-12 00:26:27 -04001761 res = fcntl(fd, F_SETFL, flags);
1762 } else {
1763 res = -1;
1764 }
1765 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001766
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001767 if (res < 0)
1768 goto error;
1769#endif
1770 return 0;
1771
1772error:
1773 PyErr_SetFromErrno(PyExc_OSError);
1774 return -1;
1775}
1776#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01001777
1778
1779int
1780_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1781 const char **grouping)
1782{
1783 int res = -1;
1784
1785 struct lconv *lc = localeconv();
1786
1787 int change_locale = 0;
1788 if (decimal_point != NULL &&
1789 (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1790 {
1791 change_locale = 1;
1792 }
1793 if (thousands_sep != NULL &&
1794 (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1795 {
1796 change_locale = 1;
1797 }
1798
1799 /* Keep a copy of the LC_CTYPE locale */
1800 char *oldloc = NULL, *loc = NULL;
1801 if (change_locale) {
1802 oldloc = setlocale(LC_CTYPE, NULL);
1803 if (!oldloc) {
Victor Stinner6eff6b82018-11-20 22:06:21 +01001804 PyErr_SetString(PyExc_RuntimeWarning, "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01001805 return -1;
1806 }
1807
1808 oldloc = _PyMem_Strdup(oldloc);
1809 if (!oldloc) {
1810 PyErr_NoMemory();
1811 return -1;
1812 }
1813
1814 loc = setlocale(LC_NUMERIC, NULL);
1815 if (loc != NULL && strcmp(loc, oldloc) == 0) {
1816 loc = NULL;
1817 }
1818
1819 if (loc != NULL) {
Victor Stinner6eff6b82018-11-20 22:06:21 +01001820 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01001821 if LC_NUMERIC locale is different than LC_CTYPE locale and
1822 decimal_point and/or thousands_sep are non-ASCII or longer than
1823 1 byte */
1824 setlocale(LC_CTYPE, loc);
1825 }
1826 }
1827
1828 if (decimal_point != NULL) {
1829 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1830 if (*decimal_point == NULL) {
1831 goto error;
1832 }
1833 }
1834 if (thousands_sep != NULL) {
1835 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1836 if (*thousands_sep == NULL) {
1837 goto error;
1838 }
1839 }
1840
1841 if (grouping != NULL) {
1842 *grouping = lc->grouping;
1843 }
1844
1845 res = 0;
1846
1847error:
1848 if (loc != NULL) {
1849 setlocale(LC_CTYPE, oldloc);
1850 }
1851 PyMem_Free(oldloc);
1852 return res;
1853}