blob: 1b7e6697c74aba565f216f79d044c3b61b3933b5 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04008extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +00009#endif
Victor Stinner4e314432010-10-07 21:45:39 +000010
Brett Cannonefb00c02012-02-29 18:31:31 -050011#ifdef HAVE_LANGINFO_H
12#include <langinfo.h>
13#endif
14
Victor Stinnerdaf45552013-08-28 00:53:59 +020015#ifdef HAVE_SYS_IOCTL_H
16#include <sys/ioctl.h>
17#endif
18
19#ifdef HAVE_FCNTL_H
20#include <fcntl.h>
21#endif /* HAVE_FCNTL_H */
22
Victor Stinnerdaf45552013-08-28 00:53:59 +020023#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020024/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020025
26 -1: unknown
27 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28 1: open() supports O_CLOEXEC flag, close-on-exec is set
29
Victor Stinnera555cfc2015-03-18 00:22:14 +010030 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020032int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
Steve Dower8fc89802015-04-12 00:26:27 -040041 int valid;
42 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070043 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040044 _Py_END_SUPPRESS_IPH
45 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040047
Victor Stinner14b9b112013-06-25 00:37:25 +020048#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050049 if (fd == 0)
50 cp = GetConsoleCP();
51 else if (fd == 1 || fd == 2)
52 cp = GetConsoleOutputCP();
53 else
54 cp = 0;
55 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56 has no console */
57 if (cp != 0)
58 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59#elif defined(CODESET)
60 {
61 char *codeset = nl_langinfo(CODESET);
62 if (codeset != NULL && codeset[0] != 0)
63 return PyUnicode_FromString(codeset);
64 }
65#endif
66 Py_RETURN_NONE;
67}
68
Victor Stinner7ed7aea2018-01-15 10:45:49 +010069#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70
71#define USE_FORCE_ASCII
72
Victor Stinnerd45c7f82012-12-04 01:34:47 +010073extern int _Py_normalize_encoding(const char *, char *, size_t);
74
75/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76 On these operating systems, nl_langinfo(CODESET) announces an alias of the
77 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79 locale.getpreferredencoding() codec. For example, if command line arguments
80 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81 UnicodeEncodeError instead of retrieving the original byte string.
82
83 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85 one byte in range 0x80-0xff can be decoded from the locale encoding. The
86 workaround is also enabled on error, for example if getting the locale
87 failed.
88
Philip Jenvey215c49a2013-01-15 13:24:12 -080089 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010090
Victor Stinnerf6a271a2014-08-01 12:28:48 +020091 1: the workaround is used: Py_EncodeLocale() uses
92 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +010093 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020094 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 -1: unknown, need to call check_force_ascii() to get the value
97*/
98static int force_ascii = -1;
99
100static int
101check_force_ascii(void)
102{
103 char *loc;
104#if defined(HAVE_LANGINFO_H) && defined(CODESET)
105 char *codeset, **alias;
Victor Stinner54de2b12016-09-09 23:11:52 -0700106 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100107 int is_ascii;
108 unsigned int i;
109 char* ascii_aliases[] = {
110 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700111 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100112 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700113 "ansi_x3.4_1968",
114 "ansi_x3.4_1986",
115 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 "cp367",
117 "csascii",
118 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700119 "iso646_us",
120 "iso_646.irv_1991",
121 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100122 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700123 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100124 NULL
125 };
126#endif
127
128 loc = setlocale(LC_CTYPE, NULL);
129 if (loc == NULL)
130 goto error;
Victor Stinner65ef7422018-08-28 13:51:20 +0200131 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100132 /* the LC_CTYPE locale is different than C */
133 return 0;
134 }
135
136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
137 codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143 goto error;
144
145 is_ascii = 0;
146 for (alias=ascii_aliases; *alias != NULL; alias++) {
147 if (strcmp(encoding, *alias) == 0) {
148 is_ascii = 1;
149 break;
150 }
151 }
152 if (!is_ascii) {
153 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154 return 0;
155 }
156
157 for (i=0x80; i<0xff; i++) {
158 unsigned char ch;
159 wchar_t wch;
160 size_t res;
161
162 ch = (unsigned char)i;
163 res = mbstowcs(&wch, (char*)&ch, 1);
164 if (res != (size_t)-1) {
165 /* decoding a non-ASCII character from the locale encoding succeed:
166 the locale encoding is not ASCII, force ASCII */
167 return 1;
168 }
169 }
170 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171 encoding: the locale encoding is really ASCII */
172 return 0;
173#else
174 /* nl_langinfo(CODESET) is not available: always force ASCII */
175 return 1;
176#endif
177
178error:
Martin Panter46f50722016-05-26 05:35:26 +0000179 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 return 1;
181}
182
Victor Stinner21220bb2018-10-30 12:59:20 +0100183
184int
185_Py_GetForceASCII(void)
186{
187 if (force_ascii == -1) {
188 force_ascii = check_force_ascii();
189 }
190 return force_ascii;
191}
192
193
194
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100195static int
196encode_ascii(const wchar_t *text, char **str,
197 size_t *error_pos, const char **reason,
198 int raw_malloc, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100199{
200 char *result = NULL, *out;
201 size_t len, i;
202 wchar_t ch;
203
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100204 len = wcslen(text);
205
Victor Stinner9bee3292017-12-21 16:49:13 +0100206 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100207 if (raw_malloc) {
208 result = PyMem_RawMalloc(len + 1);
209 }
210 else {
211 result = PyMem_Malloc(len + 1);
212 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100213 if (result == NULL) {
214 return -1;
215 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100216
217 out = result;
218 for (i=0; i<len; i++) {
219 ch = text[i];
220
221 if (ch <= 0x7f) {
222 /* ASCII character */
223 *out++ = (char)ch;
224 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100225 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100226 /* UTF-8b surrogate */
227 *out++ = (char)(ch - 0xdc00);
228 }
229 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100230 if (raw_malloc) {
231 PyMem_RawFree(result);
232 }
233 else {
234 PyMem_Free(result);
235 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100236 if (error_pos != NULL) {
237 *error_pos = i;
238 }
239 if (reason) {
240 *reason = "encoding error";
241 }
242 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100243 }
244 }
245 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100246 *str = result;
247 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100248}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100249#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100250
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100251
252#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
253static int
254decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
255 const char **reason, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100256{
257 wchar_t *res;
258 unsigned char *in;
259 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600260 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100261
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100262 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
263 return -1;
264 }
265 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
266 if (!res) {
267 return -1;
268 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100269
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100270 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100271 for (in = (unsigned char*)arg; *in; in++) {
272 unsigned char ch = *in;
273 if (ch < 128) {
274 *out++ = ch;
275 }
276 else {
277 if (!surrogateescape) {
278 PyMem_RawFree(res);
279 if (wlen) {
280 *wlen = in - (unsigned char*)arg;
281 }
282 if (reason) {
283 *reason = "decoding error";
284 }
285 return -2;
286 }
287 *out++ = 0xdc00 + ch;
288 }
289 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100290 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100291
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100292 if (wlen != NULL) {
293 *wlen = out - res;
294 }
295 *wstr = res;
296 return 0;
297}
298#endif /* !HAVE_MBRTOWC */
299
300static int
301decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
302 const char **reason, int surrogateescape)
Victor Stinner4e314432010-10-07 21:45:39 +0000303{
304 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100305 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000306 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200307#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000308 unsigned char *in;
309 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000310 mbstate_t mbs;
311#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100312
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100313#ifdef HAVE_BROKEN_MBSTOWCS
314 /* Some platforms have a broken implementation of
315 * mbstowcs which does not count the characters that
316 * would result from conversion. Use an upper bound.
317 */
318 argsize = strlen(arg);
319#else
320 argsize = mbstowcs(NULL, arg, 0);
321#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000322 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100323 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
324 return -1;
325 }
326 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
327 if (!res) {
328 return -1;
329 }
330
331 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000332 if (count != (size_t)-1) {
333 wchar_t *tmp;
334 /* Only use the result if it contains no
335 surrogate characters. */
336 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100337 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000338 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000339 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100340 if (wlen != NULL) {
341 *wlen = count;
342 }
343 *wstr = res;
344 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000345 }
Victor Stinner4e314432010-10-07 21:45:39 +0000346 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200347 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000348 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100349
Victor Stinner4e314432010-10-07 21:45:39 +0000350 /* Conversion failed. Fall back to escaping with surrogateescape. */
351#ifdef HAVE_MBRTOWC
352 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
353
354 /* Overallocate; as multi-byte characters are in the argument, the
355 actual output could use less memory. */
356 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100357 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
358 return -1;
359 }
360 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
361 if (!res) {
362 return -1;
363 }
364
Victor Stinner4e314432010-10-07 21:45:39 +0000365 in = (unsigned char*)arg;
366 out = res;
367 memset(&mbs, 0, sizeof mbs);
368 while (argsize) {
369 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100370 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000371 /* Reached end of string; null char stored. */
372 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100373 }
374
Victor Stinner4e314432010-10-07 21:45:39 +0000375 if (converted == (size_t)-2) {
376 /* Incomplete character. This should never happen,
377 since we provide everything that we have -
378 unless there is a bug in the C library, or I
379 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100380 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000381 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100382
Victor Stinner4e314432010-10-07 21:45:39 +0000383 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100384 if (!surrogateescape) {
385 goto decode_error;
386 }
387
Victor Stinner4e314432010-10-07 21:45:39 +0000388 /* Conversion error. Escape as UTF-8b, and start over
389 in the initial shift state. */
390 *out++ = 0xdc00 + *in++;
391 argsize--;
392 memset(&mbs, 0, sizeof mbs);
393 continue;
394 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100395
Victor Stinner76df43d2012-10-30 01:42:39 +0100396 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100397 if (!surrogateescape) {
398 goto decode_error;
399 }
400
Victor Stinner4e314432010-10-07 21:45:39 +0000401 /* Surrogate character. Escape the original
402 byte sequence with surrogateescape. */
403 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100404 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000405 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100406 }
Victor Stinner4e314432010-10-07 21:45:39 +0000407 continue;
408 }
409 /* successfully converted some bytes */
410 in += converted;
411 argsize -= converted;
412 out++;
413 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100414 if (wlen != NULL) {
415 *wlen = out - res;
416 }
417 *wstr = res;
418 return 0;
419
420decode_error:
421 PyMem_RawFree(res);
422 if (wlen) {
423 *wlen = in - (unsigned char*)arg;
424 }
425 if (reason) {
426 *reason = "decoding error";
427 }
428 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100429#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000430 /* Cannot use C locale for escaping; manually escape as if charset
431 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
432 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100433 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinnere2623772012-11-12 23:04:02 +0100434#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100435}
436
437
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100438/* Decode a byte string from the locale encoding.
439
440 Use the strict error handler if 'surrogateescape' is zero. Use the
441 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
442 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
443 can be decoded as a surrogate character, escape the bytes using the
444 surrogateescape error handler instead of decoding them.
445
Miss Islington (bot)32955292018-04-20 14:00:41 -0700446 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100447 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
448 the number of wide characters excluding the null character into *wlen.
449
450 On memory allocation failure, return -1.
451
452 On decoding error, return -2. If wlen is not NULL, write the start of
453 invalid byte sequence in the input string into *wlen. If reason is not NULL,
454 write the decoding error message into *reason.
455
456 Use the Py_EncodeLocaleEx() function to encode the character string back to
457 a byte string. */
458int
459_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
460 const char **reason,
461 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100462{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100463 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100464#ifdef __ANDROID__
465 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
466 surrogateescape);
467#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100468 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100469#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100470 }
471
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100472#if defined(__APPLE__) || defined(__ANDROID__)
473 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
474 surrogateescape);
475#else
476 if (Py_UTF8Mode == 1) {
477 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
478 surrogateescape);
479 }
480
481#ifdef USE_FORCE_ASCII
482 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100483 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100484 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100485
486 if (force_ascii) {
487 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100488 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100489 }
490#endif
491
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100492 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100493#endif /* __APPLE__ or __ANDROID__ */
494}
495
496
Victor Stinner91106cd2017-12-13 12:29:09 +0100497/* Decode a byte string from the locale encoding with the
498 surrogateescape error handler: undecodable bytes are decoded as characters
499 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
500 character, escape the bytes using the surrogateescape error handler instead
501 of decoding them.
502
503 Return a pointer to a newly allocated wide character string, use
504 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
505 wide characters excluding the null character into *size
506
507 Return NULL on decoding error or memory allocation error. If *size* is not
508 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
509 decoding error.
510
511 Decoding errors should never happen, unless there is a bug in the C
512 library.
513
514 Use the Py_EncodeLocale() function to encode the character string back to a
515 byte string. */
516wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100517Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100518{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100519 wchar_t *wstr;
520 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
521 if (res != 0) {
522 if (wlen != NULL) {
523 *wlen = (size_t)res;
524 }
525 return NULL;
526 }
527 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100528}
Victor Stinner91106cd2017-12-13 12:29:09 +0100529
Victor Stinner91106cd2017-12-13 12:29:09 +0100530
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100531static int
532encode_current_locale(const wchar_t *text, char **str,
533 size_t *error_pos, const char **reason,
534 int raw_malloc, int surrogateescape)
Victor Stinner91106cd2017-12-13 12:29:09 +0100535{
Victor Stinner4e314432010-10-07 21:45:39 +0000536 const size_t len = wcslen(text);
537 char *result = NULL, *bytes = NULL;
538 size_t i, size, converted;
539 wchar_t c, buf[2];
540
541 /* The function works in two steps:
542 1. compute the length of the output buffer in bytes (size)
543 2. outputs the bytes */
544 size = 0;
545 buf[1] = 0;
546 while (1) {
547 for (i=0; i < len; i++) {
548 c = text[i];
549 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100550 if (!surrogateescape) {
551 goto encode_error;
552 }
Victor Stinner4e314432010-10-07 21:45:39 +0000553 /* UTF-8b surrogate */
554 if (bytes != NULL) {
555 *bytes++ = c - 0xdc00;
556 size--;
557 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100558 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000559 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100560 }
Victor Stinner4e314432010-10-07 21:45:39 +0000561 continue;
562 }
563 else {
564 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100565 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000566 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100567 }
568 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000569 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100570 }
Victor Stinner4e314432010-10-07 21:45:39 +0000571 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100572 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000573 }
574 if (bytes != NULL) {
575 bytes += converted;
576 size -= converted;
577 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100578 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000579 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100580 }
Victor Stinner4e314432010-10-07 21:45:39 +0000581 }
582 }
583 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100584 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000585 break;
586 }
587
588 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100589 if (raw_malloc) {
590 result = PyMem_RawMalloc(size);
591 }
592 else {
593 result = PyMem_Malloc(size);
594 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100595 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100596 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100597 }
Victor Stinner4e314432010-10-07 21:45:39 +0000598 bytes = result;
599 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100600 *str = result;
601 return 0;
602
603encode_error:
604 if (raw_malloc) {
605 PyMem_RawFree(result);
606 }
607 else {
608 PyMem_Free(result);
609 }
610 if (error_pos != NULL) {
611 *error_pos = i;
612 }
613 if (reason) {
614 *reason = "encoding error";
615 }
616 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100617}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100618
619static int
620encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
621 const char **reason,
622 int raw_malloc, int current_locale, int surrogateescape)
623{
624 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100625#ifdef __ANDROID__
626 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
627 raw_malloc, surrogateescape);
628#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100629 return encode_current_locale(text, str, error_pos, reason,
630 raw_malloc, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100631#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100632 }
633
634#if defined(__APPLE__) || defined(__ANDROID__)
635 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
636 raw_malloc, surrogateescape);
637#else /* __APPLE__ */
638 if (Py_UTF8Mode == 1) {
639 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
640 raw_malloc, surrogateescape);
641 }
642
643#ifdef USE_FORCE_ASCII
644 if (force_ascii == -1) {
645 force_ascii = check_force_ascii();
646 }
647
648 if (force_ascii) {
649 return encode_ascii(text, str, error_pos, reason,
650 raw_malloc, surrogateescape);
651 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100652#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100653
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100654 return encode_current_locale(text, str, error_pos, reason,
655 raw_malloc, surrogateescape);
656#endif /* __APPLE__ or __ANDROID__ */
657}
658
Victor Stinner9dd76202017-12-21 16:20:32 +0100659static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100660encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100661 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100662{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100663 char *str;
664 int res = encode_locale_ex(text, &str, error_pos, NULL,
665 raw_malloc, current_locale, 1);
666 if (res != -2 && error_pos) {
667 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100668 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100669 if (res != 0) {
670 return NULL;
671 }
672 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100673}
674
Victor Stinner91106cd2017-12-13 12:29:09 +0100675/* Encode a wide character string to the locale encoding with the
676 surrogateescape error handler: surrogate characters in the range
677 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
678
679 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
680 the memory. Return NULL on encoding or memory allocation error.
681
682 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
683 to the index of the invalid character on encoding error.
684
685 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
686 character string. */
687char*
688Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
689{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100690 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100691}
Victor Stinner91106cd2017-12-13 12:29:09 +0100692
Victor Stinner91106cd2017-12-13 12:29:09 +0100693
Victor Stinner9dd76202017-12-21 16:20:32 +0100694/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
695 instead of PyMem_Free(). */
696char*
697_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
698{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100699 return encode_locale(text, error_pos, 1, 0);
700}
701
702
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100703int
704_Py_EncodeLocaleEx(const wchar_t *text, char **str,
705 size_t *error_pos, const char **reason,
706 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100707{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100708 return encode_locale_ex(text, str, error_pos, reason, 1,
709 current_locale, surrogateescape);
Victor Stinner4e314432010-10-07 21:45:39 +0000710}
711
Victor Stinner6672d0c2010-10-07 22:53:43 +0000712
Steve Dowerf2f373f2015-02-21 08:44:05 -0800713#ifdef MS_WINDOWS
714static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
715
716static void
717FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
718{
719 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
720 /* Cannot simply cast and dereference in_ptr,
721 since it might not be aligned properly */
722 __int64 in;
723 memcpy(&in, in_ptr, sizeof(in));
724 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
725 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
726}
727
728void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800729_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800730{
731 /* XXX endianness */
732 __int64 out;
733 out = time_in + secs_between_epochs;
734 out = out * 10000000 + nsec_in / 100;
735 memcpy(out_ptr, &out, sizeof(out));
736}
737
738/* Below, we *know* that ugo+r is 0444 */
739#if _S_IREAD != 0400
740#error Unsupported C library
741#endif
742static int
743attributes_to_mode(DWORD attr)
744{
745 int m = 0;
746 if (attr & FILE_ATTRIBUTE_DIRECTORY)
747 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
748 else
749 m |= _S_IFREG;
750 if (attr & FILE_ATTRIBUTE_READONLY)
751 m |= 0444;
752 else
753 m |= 0666;
754 return m;
755}
756
Steve Dowerbf1f3762015-02-21 15:26:02 -0800757void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200758_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
759 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800760{
761 memset(result, 0, sizeof(*result));
762 result->st_mode = attributes_to_mode(info->dwFileAttributes);
763 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
764 result->st_dev = info->dwVolumeSerialNumber;
765 result->st_rdev = result->st_dev;
766 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
767 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
768 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
769 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100770 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800771 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
772 /* first clear the S_IFMT bits */
773 result->st_mode ^= (result->st_mode & S_IFMT);
774 /* now set the bits that make this a symlink */
775 result->st_mode |= S_IFLNK;
776 }
777 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800778}
779#endif
780
781/* Return information about a file.
782
783 On POSIX, use fstat().
784
785 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800786 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
787 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800788 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200789
790 On Windows, set the last Windows error and return nonzero on error. On
791 POSIX, set errno and return nonzero on error. Fill status and return 0 on
792 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800793int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200794_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800795{
796#ifdef MS_WINDOWS
797 BY_HANDLE_FILE_INFORMATION info;
798 HANDLE h;
799 int type;
800
Steve Dower940f33a2016-09-08 11:21:54 -0700801 _Py_BEGIN_SUPPRESS_IPH
802 h = (HANDLE)_get_osfhandle(fd);
803 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800804
805 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400806 /* errno is already set by _get_osfhandle, but we also set
807 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800808 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800809 return -1;
810 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200811 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800812
813 type = GetFileType(h);
814 if (type == FILE_TYPE_UNKNOWN) {
815 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400816 if (error != 0) {
817 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800818 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400819 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800820 /* else: valid but unknown file */
821 }
822
823 if (type != FILE_TYPE_DISK) {
824 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200825 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800826 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200827 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800828 return 0;
829 }
830
831 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400832 /* The Win32 error is already set, but we also set errno for
833 callers who expect it */
834 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800835 return -1;
836 }
837
Victor Stinnere134a7f2015-03-30 10:09:31 +0200838 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800839 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100840 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800841 return 0;
842#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200843 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800844#endif
845}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800846
Victor Stinnere134a7f2015-03-30 10:09:31 +0200847/* Return information about a file.
848
849 On POSIX, use fstat().
850
851 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800852 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
853 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200854 #23152.
855
856 Raise an exception and return -1 on error. On Windows, set the last Windows
857 error on error. On POSIX, set errno on error. Fill status and return 0 on
858 success.
859
Victor Stinner6f4fae82015-04-01 18:34:32 +0200860 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
861 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200862int
863_Py_fstat(int fd, struct _Py_stat_struct *status)
864{
865 int res;
866
Victor Stinner8a1be612016-03-14 22:07:55 +0100867 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100868
Victor Stinnere134a7f2015-03-30 10:09:31 +0200869 Py_BEGIN_ALLOW_THREADS
870 res = _Py_fstat_noraise(fd, status);
871 Py_END_ALLOW_THREADS
872
873 if (res != 0) {
874#ifdef MS_WINDOWS
875 PyErr_SetFromWindowsErr(0);
876#else
877 PyErr_SetFromErrno(PyExc_OSError);
878#endif
879 return -1;
880 }
881 return 0;
882}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800883
Victor Stinner6672d0c2010-10-07 22:53:43 +0000884/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
885 call stat() otherwise. Only fill st_mode attribute on Windows.
886
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100887 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
888 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000889
890int
Victor Stinnera4a75952010-10-07 22:23:10 +0000891_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000892{
893#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000894 int err;
895 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300896 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000897
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300898 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +0100899 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100900 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300901
Victor Stinneree587ea2011-11-17 00:51:38 +0100902 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000903 if (!err)
904 statbuf->st_mode = wstatbuf.st_mode;
905 return err;
906#else
907 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300908 PyObject *bytes;
909 char *cpath;
910
911 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000912 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100913 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300914
915 /* check for embedded null bytes */
916 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
917 Py_DECREF(bytes);
918 return -2;
919 }
920
921 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000922 Py_DECREF(bytes);
923 return ret;
924#endif
925}
926
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100927
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800928/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +0200929static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200930get_inheritable(int fd, int raise)
931{
932#ifdef MS_WINDOWS
933 HANDLE handle;
934 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000935
Steve Dower8fc89802015-04-12 00:26:27 -0400936 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200937 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400938 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200939 if (handle == INVALID_HANDLE_VALUE) {
940 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700941 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200942 return -1;
943 }
944
945 if (!GetHandleInformation(handle, &flags)) {
946 if (raise)
947 PyErr_SetFromWindowsErr(0);
948 return -1;
949 }
950
951 return (flags & HANDLE_FLAG_INHERIT);
952#else
953 int flags;
954
955 flags = fcntl(fd, F_GETFD, 0);
956 if (flags == -1) {
957 if (raise)
958 PyErr_SetFromErrno(PyExc_OSError);
959 return -1;
960 }
961 return !(flags & FD_CLOEXEC);
962#endif
963}
964
965/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200966 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200967 raise an exception and return -1 on error. */
968int
969_Py_get_inheritable(int fd)
970{
971 return get_inheritable(fd, 1);
972}
973
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -0800974
975/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200976static int
977set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
978{
979#ifdef MS_WINDOWS
980 HANDLE handle;
981 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +0200982#else
983#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
984 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200985 int request;
986 int err;
Victor Stinner282124b2014-09-02 11:41:04 +0200987#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +0200988 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200989 int res;
990#endif
991
992 /* atomic_flag_works can only be used to make the file descriptor
993 non-inheritable */
994 assert(!(atomic_flag_works != NULL && inheritable));
995
996 if (atomic_flag_works != NULL && !inheritable) {
997 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -0700998 int isInheritable = get_inheritable(fd, raise);
999 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001000 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001001 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001002 }
1003
1004 if (*atomic_flag_works)
1005 return 0;
1006 }
1007
1008#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001009 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001010 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001011 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001012 if (handle == INVALID_HANDLE_VALUE) {
1013 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001014 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001015 return -1;
1016 }
1017
1018 if (inheritable)
1019 flags = HANDLE_FLAG_INHERIT;
1020 else
1021 flags = 0;
1022 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1023 if (raise)
1024 PyErr_SetFromWindowsErr(0);
1025 return -1;
1026 }
1027 return 0;
1028
Victor Stinnerdaf45552013-08-28 00:53:59 +02001029#else
Victor Stinner282124b2014-09-02 11:41:04 +02001030
1031#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001032 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001033 /* fast-path: ioctl() only requires one syscall */
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001034 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1035 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001036 if (inheritable)
1037 request = FIONCLEX;
1038 else
1039 request = FIOCLEX;
1040 err = ioctl(fd, request, NULL);
1041 if (!err) {
1042 ioctl_works = 1;
1043 return 0;
1044 }
1045
Victor Stinner3116cc42016-05-19 16:46:18 +02001046 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001047 if (raise)
1048 PyErr_SetFromErrno(PyExc_OSError);
1049 return -1;
1050 }
1051 else {
1052 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1053 device". The ioctl is declared but not supported by the kernel.
1054 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001055 Illumos-based OS for example.
1056
1057 Issue #27057: When SELinux policy disallows ioctl it will fail
1058 with EACCES. While FIOCLEX is safe operation it may be
1059 unavailable because ioctl was denied altogether.
1060 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001061 ioctl_works = 0;
1062 }
1063 /* fallback to fcntl() if ioctl() does not work */
1064 }
1065#endif
1066
1067 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001068 flags = fcntl(fd, F_GETFD);
1069 if (flags < 0) {
1070 if (raise)
1071 PyErr_SetFromErrno(PyExc_OSError);
1072 return -1;
1073 }
1074
Victor Stinnera858bbd2016-04-17 16:51:52 +02001075 if (inheritable) {
1076 new_flags = flags & ~FD_CLOEXEC;
1077 }
1078 else {
1079 new_flags = flags | FD_CLOEXEC;
1080 }
1081
1082 if (new_flags == flags) {
1083 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1084 return 0;
1085 }
1086
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001087 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001088 if (res < 0) {
1089 if (raise)
1090 PyErr_SetFromErrno(PyExc_OSError);
1091 return -1;
1092 }
1093 return 0;
1094#endif
1095}
1096
1097/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001098 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001099static int
1100make_non_inheritable(int fd)
1101{
1102 return set_inheritable(fd, 0, 0, NULL);
1103}
1104
1105/* Set the inheritable flag of the specified file descriptor.
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001106 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001107
1108 If atomic_flag_works is not NULL:
1109
1110 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1111 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1112 set the inheritable flag
1113 * if *atomic_flag_works==1: do nothing
1114 * if *atomic_flag_works==0: set inheritable flag to False
1115
1116 Set atomic_flag_works to NULL if no atomic flag was used to create the
1117 file descriptor.
1118
1119 atomic_flag_works can only be used to make a file descriptor
1120 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1121int
1122_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1123{
1124 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1125}
1126
Miss Islington (bot)2bb0bfa2018-02-05 22:31:22 -08001127/* Same as _Py_set_inheritable() but on error, set errno and
1128 don't raise an exception.
1129 This function is async-signal-safe. */
1130int
1131_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1132{
1133 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1134}
1135
Victor Stinnera555cfc2015-03-18 00:22:14 +01001136static int
1137_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001138{
1139 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001140 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001141#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001142 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001143#endif
1144
1145#ifdef MS_WINDOWS
1146 flags |= O_NOINHERIT;
1147#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001148 atomic_flag_works = &_Py_open_cloexec_works;
1149 flags |= O_CLOEXEC;
1150#else
1151 atomic_flag_works = NULL;
1152#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001153
Victor Stinnera555cfc2015-03-18 00:22:14 +01001154 if (gil_held) {
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001155 do {
1156 Py_BEGIN_ALLOW_THREADS
1157 fd = open(pathname, flags);
1158 Py_END_ALLOW_THREADS
1159 } while (fd < 0
1160 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1161 if (async_err)
1162 return -1;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001163 if (fd < 0) {
1164 PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1165 return -1;
1166 }
1167 }
1168 else {
1169 fd = open(pathname, flags);
1170 if (fd < 0)
1171 return -1;
1172 }
1173
1174#ifndef MS_WINDOWS
1175 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001176 close(fd);
1177 return -1;
1178 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001179#endif
1180
Victor Stinnerdaf45552013-08-28 00:53:59 +02001181 return fd;
1182}
1183
Victor Stinnera555cfc2015-03-18 00:22:14 +01001184/* Open a file with the specified flags (wrapper to open() function).
1185 Return a file descriptor on success. Raise an exception and return -1 on
1186 error.
1187
1188 The file descriptor is created non-inheritable.
1189
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001190 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1191 except if the Python signal handler raises an exception.
1192
Victor Stinner6f4fae82015-04-01 18:34:32 +02001193 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001194int
1195_Py_open(const char *pathname, int flags)
1196{
1197 /* _Py_open() must be called with the GIL held. */
1198 assert(PyGILState_Check());
1199 return _Py_open_impl(pathname, flags, 1);
1200}
1201
1202/* Open a file with the specified flags (wrapper to open() function).
1203 Return a file descriptor on success. Set errno and return -1 on error.
1204
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001205 The file descriptor is created non-inheritable.
1206
1207 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001208int
1209_Py_open_noraise(const char *pathname, int flags)
1210{
1211 return _Py_open_impl(pathname, flags, 0);
1212}
1213
Victor Stinnerdaf45552013-08-28 00:53:59 +02001214/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001215 encoding and use fopen() otherwise.
1216
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001217 The file descriptor is created non-inheritable.
1218
1219 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001220FILE *
1221_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1222{
Victor Stinner4e314432010-10-07 21:45:39 +00001223 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001224#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001225 char *cpath;
1226 char cmode[10];
1227 size_t r;
1228 r = wcstombs(cmode, mode, 10);
1229 if (r == (size_t)-1 || r >= 10) {
1230 errno = EINVAL;
1231 return NULL;
1232 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001233 cpath = _Py_EncodeLocaleRaw(path, NULL);
1234 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001235 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001236 }
Victor Stinner4e314432010-10-07 21:45:39 +00001237 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001238 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001239#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001240 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001241#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001242 if (f == NULL)
1243 return NULL;
1244 if (make_non_inheritable(fileno(f)) < 0) {
1245 fclose(f);
1246 return NULL;
1247 }
1248 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001249}
1250
Victor Stinnere42ccd22015-03-18 01:39:23 +01001251/* Wrapper to fopen().
1252
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001253 The file descriptor is created non-inheritable.
1254
1255 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001256FILE*
1257_Py_fopen(const char *pathname, const char *mode)
1258{
1259 FILE *f = fopen(pathname, mode);
1260 if (f == NULL)
1261 return NULL;
1262 if (make_non_inheritable(fileno(f)) < 0) {
1263 fclose(f);
1264 return NULL;
1265 }
1266 return f;
1267}
1268
1269/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001270 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001271
Victor Stinnere42ccd22015-03-18 01:39:23 +01001272 Return the new file object on success. Raise an exception and return NULL
1273 on error.
1274
1275 The file descriptor is created non-inheritable.
1276
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001277 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1278 except if the Python signal handler raises an exception.
1279
Victor Stinner6f4fae82015-04-01 18:34:32 +02001280 Release the GIL to call _wfopen() or fopen(). The caller must hold
1281 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001282FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001283_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001284{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001285 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001286 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001287#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001288 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001289 wchar_t wmode[10];
1290 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001291
Victor Stinnere42ccd22015-03-18 01:39:23 +01001292 assert(PyGILState_Check());
1293
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001294 if (!PyUnicode_Check(path)) {
1295 PyErr_Format(PyExc_TypeError,
1296 "str file path expected under Windows, got %R",
1297 Py_TYPE(path));
1298 return NULL;
1299 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001300 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001301 if (wpath == NULL)
1302 return NULL;
1303
Miss Islington (bot)ca82e3c2018-02-18 10:40:07 -08001304 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1305 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001306 if (usize == 0) {
1307 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001308 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001309 }
Victor Stinner4e314432010-10-07 21:45:39 +00001310
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001311 do {
1312 Py_BEGIN_ALLOW_THREADS
1313 f = _wfopen(wpath, wmode);
1314 Py_END_ALLOW_THREADS
1315 } while (f == NULL
1316 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001317#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001318 PyObject *bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001319 char *path_bytes;
1320
1321 assert(PyGILState_Check());
1322
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001323 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001324 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001325 path_bytes = PyBytes_AS_STRING(bytes);
1326
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001327 do {
1328 Py_BEGIN_ALLOW_THREADS
1329 f = fopen(path_bytes, mode);
1330 Py_END_ALLOW_THREADS
1331 } while (f == NULL
1332 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001333
Victor Stinner4e314432010-10-07 21:45:39 +00001334 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001335#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001336 if (async_err)
1337 return NULL;
1338
Victor Stinnere42ccd22015-03-18 01:39:23 +01001339 if (f == NULL) {
1340 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001341 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001342 }
1343
1344 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001345 fclose(f);
1346 return NULL;
1347 }
1348 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001349}
1350
Victor Stinner66aab0c2015-03-19 22:53:20 +01001351/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001352
1353 On success, return the number of read bytes, it can be lower than count.
1354 If the current file offset is at or past the end of file, no bytes are read,
1355 and read() returns zero.
1356
1357 On error, raise an exception, set errno and return -1.
1358
1359 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1360 If the Python signal handler raises an exception, the function returns -1
1361 (the syscall is not retried).
1362
1363 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001364Py_ssize_t
1365_Py_read(int fd, void *buf, size_t count)
1366{
1367 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001368 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001369 int async_err = 0;
1370
Victor Stinner8a1be612016-03-14 22:07:55 +01001371 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001372
Victor Stinner66aab0c2015-03-19 22:53:20 +01001373 /* _Py_read() must not be called with an exception set, otherwise the
1374 * caller may think that read() was interrupted by a signal and the signal
1375 * handler raised an exception. */
1376 assert(!PyErr_Occurred());
1377
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001378 if (count > _PY_READ_MAX) {
1379 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001380 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001381
Steve Dower8fc89802015-04-12 00:26:27 -04001382 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001383 do {
1384 Py_BEGIN_ALLOW_THREADS
1385 errno = 0;
1386#ifdef MS_WINDOWS
1387 n = read(fd, buf, (int)count);
1388#else
1389 n = read(fd, buf, count);
1390#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001391 /* save/restore errno because PyErr_CheckSignals()
1392 * and PyErr_SetFromErrno() can modify it */
1393 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001394 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001395 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001396 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001397 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001398
1399 if (async_err) {
1400 /* read() was interrupted by a signal (failed with EINTR)
1401 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001402 errno = err;
1403 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001404 return -1;
1405 }
1406 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001407 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001408 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001409 return -1;
1410 }
1411
1412 return n;
1413}
1414
Victor Stinner82c3e452015-04-01 18:34:45 +02001415static Py_ssize_t
1416_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001417{
1418 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001419 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001420 int async_err = 0;
1421
Steve Dower8fc89802015-04-12 00:26:27 -04001422 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001423#ifdef MS_WINDOWS
1424 if (count > 32767 && isatty(fd)) {
1425 /* Issue #11395: the Windows console returns an error (12: not
1426 enough space error) on writing into stdout if stdout mode is
1427 binary and the length is greater than 66,000 bytes (or less,
1428 depending on heap usage). */
1429 count = 32767;
1430 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001431#endif
Miss Islington (bot)178d1c02018-10-17 23:58:40 -07001432 if (count > _PY_WRITE_MAX) {
1433 count = _PY_WRITE_MAX;
1434 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001435
Victor Stinner82c3e452015-04-01 18:34:45 +02001436 if (gil_held) {
1437 do {
1438 Py_BEGIN_ALLOW_THREADS
1439 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001440#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001441 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001442#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001443 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001444#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001445 /* save/restore errno because PyErr_CheckSignals()
1446 * and PyErr_SetFromErrno() can modify it */
1447 err = errno;
1448 Py_END_ALLOW_THREADS
1449 } while (n < 0 && err == EINTR &&
1450 !(async_err = PyErr_CheckSignals()));
1451 }
1452 else {
1453 do {
1454 errno = 0;
1455#ifdef MS_WINDOWS
1456 n = write(fd, buf, (int)count);
1457#else
1458 n = write(fd, buf, count);
1459#endif
1460 err = errno;
1461 } while (n < 0 && err == EINTR);
1462 }
Steve Dower8fc89802015-04-12 00:26:27 -04001463 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001464
1465 if (async_err) {
1466 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001467 and the Python signal handler raised an exception (if gil_held is
1468 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001469 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001470 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001471 return -1;
1472 }
1473 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001474 if (gil_held)
1475 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001476 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001477 return -1;
1478 }
1479
1480 return n;
1481}
1482
Victor Stinner82c3e452015-04-01 18:34:45 +02001483/* Write count bytes of buf into fd.
1484
1485 On success, return the number of written bytes, it can be lower than count
1486 including 0. On error, raise an exception, set errno and return -1.
1487
1488 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1489 If the Python signal handler raises an exception, the function returns -1
1490 (the syscall is not retried).
1491
1492 Release the GIL to call write(). The caller must hold the GIL. */
1493Py_ssize_t
1494_Py_write(int fd, const void *buf, size_t count)
1495{
Victor Stinner8a1be612016-03-14 22:07:55 +01001496 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001497
Victor Stinner82c3e452015-04-01 18:34:45 +02001498 /* _Py_write() must not be called with an exception set, otherwise the
1499 * caller may think that write() was interrupted by a signal and the signal
1500 * handler raised an exception. */
1501 assert(!PyErr_Occurred());
1502
1503 return _Py_write_impl(fd, buf, count, 1);
1504}
1505
1506/* Write count bytes of buf into fd.
1507 *
1508 * On success, return the number of written bytes, it can be lower than count
1509 * including 0. On error, set errno and return -1.
1510 *
1511 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1512 * without calling the Python signal handler. */
1513Py_ssize_t
1514_Py_write_noraise(int fd, const void *buf, size_t count)
1515{
1516 return _Py_write_impl(fd, buf, count, 0);
1517}
1518
Victor Stinner4e314432010-10-07 21:45:39 +00001519#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001520
1521/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001522 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001523
Victor Stinner4e314432010-10-07 21:45:39 +00001524int
1525_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1526{
1527 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001528 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001529 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001530 int res;
1531 size_t r1;
1532
Victor Stinner9dd76202017-12-21 16:20:32 +01001533 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001534 if (cpath == NULL) {
1535 errno = EINVAL;
1536 return -1;
1537 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001538 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner9dd76202017-12-21 16:20:32 +01001539 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001540 if (res == -1)
1541 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001542 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001543 errno = EINVAL;
1544 return -1;
1545 }
1546 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001547 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001548 if (wbuf == NULL) {
1549 errno = EINVAL;
1550 return -1;
1551 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001552 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001553 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001554 errno = EINVAL;
1555 return -1;
1556 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001557 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001558 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001559 return (int)r1;
1560}
1561#endif
1562
1563#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001564
1565/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001566 encoding, decode the result from the locale encoding.
1567 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001568
Victor Stinner4e314432010-10-07 21:45:39 +00001569wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001570_Py_wrealpath(const wchar_t *path,
1571 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001572{
1573 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001574 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001575 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001576 char *res;
1577 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001578 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001579 if (cpath == NULL) {
1580 errno = EINVAL;
1581 return NULL;
1582 }
1583 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001584 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001585 if (res == NULL)
1586 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001587
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001588 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001589 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001590 errno = EINVAL;
1591 return NULL;
1592 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001593 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001594 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001595 errno = EINVAL;
1596 return NULL;
1597 }
1598 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001599 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001600 return resolved_path;
1601}
1602#endif
1603
Victor Stinnerf4061da2010-10-14 12:37:19 +00001604/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001605 including the null character. Decode the path from the locale encoding.
1606 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001607
Victor Stinner4e314432010-10-07 21:45:39 +00001608wchar_t*
1609_Py_wgetcwd(wchar_t *buf, size_t size)
1610{
1611#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001612 int isize = (int)Py_MIN(size, INT_MAX);
1613 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001614#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001615 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001616 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001617 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001618
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001619 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001620 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001621 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001622 if (wname == NULL)
1623 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001624 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001625 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001626 return NULL;
1627 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001628 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001629 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001630 return buf;
1631#endif
1632}
1633
Victor Stinnerdaf45552013-08-28 00:53:59 +02001634/* Duplicate a file descriptor. The new file descriptor is created as
1635 non-inheritable. Return a new file descriptor on success, raise an OSError
1636 exception and return -1 on error.
1637
1638 The GIL is released to call dup(). The caller must hold the GIL. */
1639int
1640_Py_dup(int fd)
1641{
1642#ifdef MS_WINDOWS
1643 HANDLE handle;
1644 DWORD ftype;
1645#endif
1646
Victor Stinner8a1be612016-03-14 22:07:55 +01001647 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001648
Victor Stinnerdaf45552013-08-28 00:53:59 +02001649#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001650 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001651 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001652 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001653 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001654 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001655 return -1;
1656 }
1657
1658 /* get the file type, ignore the error if it failed */
1659 ftype = GetFileType(handle);
1660
1661 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001662 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001663 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001664 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001665 Py_END_ALLOW_THREADS
1666 if (fd < 0) {
1667 PyErr_SetFromErrno(PyExc_OSError);
1668 return -1;
1669 }
1670
1671 /* Character files like console cannot be make non-inheritable */
1672 if (ftype != FILE_TYPE_CHAR) {
1673 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001674 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001675 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001676 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001677 return -1;
1678 }
1679 }
1680#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1681 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001682 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001683 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001684 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001685 Py_END_ALLOW_THREADS
1686 if (fd < 0) {
1687 PyErr_SetFromErrno(PyExc_OSError);
1688 return -1;
1689 }
1690
1691#else
1692 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001693 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001694 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001695 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001696 Py_END_ALLOW_THREADS
1697 if (fd < 0) {
1698 PyErr_SetFromErrno(PyExc_OSError);
1699 return -1;
1700 }
1701
1702 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001703 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001704 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001705 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001706 return -1;
1707 }
1708#endif
1709 return fd;
1710}
1711
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001712#ifndef MS_WINDOWS
1713/* Get the blocking mode of the file descriptor.
1714 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1715 raise an exception and return -1 on error. */
1716int
1717_Py_get_blocking(int fd)
1718{
Steve Dower8fc89802015-04-12 00:26:27 -04001719 int flags;
1720 _Py_BEGIN_SUPPRESS_IPH
1721 flags = fcntl(fd, F_GETFL, 0);
1722 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001723 if (flags < 0) {
1724 PyErr_SetFromErrno(PyExc_OSError);
1725 return -1;
1726 }
1727
1728 return !(flags & O_NONBLOCK);
1729}
1730
1731/* Set the blocking mode of the specified file descriptor.
1732
1733 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1734 otherwise.
1735
1736 Return 0 on success, raise an exception and return -1 on error. */
1737int
1738_Py_set_blocking(int fd, int blocking)
1739{
1740#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1741 int arg = !blocking;
1742 if (ioctl(fd, FIONBIO, &arg) < 0)
1743 goto error;
1744#else
1745 int flags, res;
1746
Steve Dower8fc89802015-04-12 00:26:27 -04001747 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001748 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001749 if (flags >= 0) {
1750 if (blocking)
1751 flags = flags & (~O_NONBLOCK);
1752 else
1753 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001754
Steve Dower8fc89802015-04-12 00:26:27 -04001755 res = fcntl(fd, F_SETFL, flags);
1756 } else {
1757 res = -1;
1758 }
1759 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001760
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001761 if (res < 0)
1762 goto error;
1763#endif
1764 return 0;
1765
1766error:
1767 PyErr_SetFromErrno(PyExc_OSError);
1768 return -1;
1769}
1770#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01001771
1772
1773int
1774_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1775 const char **grouping)
1776{
1777 int res = -1;
1778
1779 struct lconv *lc = localeconv();
1780
1781 int change_locale = 0;
1782 if (decimal_point != NULL &&
1783 (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1784 {
1785 change_locale = 1;
1786 }
1787 if (thousands_sep != NULL &&
1788 (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1789 {
1790 change_locale = 1;
1791 }
1792
1793 /* Keep a copy of the LC_CTYPE locale */
1794 char *oldloc = NULL, *loc = NULL;
1795 if (change_locale) {
1796 oldloc = setlocale(LC_CTYPE, NULL);
1797 if (!oldloc) {
1798 PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
1799 return -1;
1800 }
1801
1802 oldloc = _PyMem_Strdup(oldloc);
1803 if (!oldloc) {
1804 PyErr_NoMemory();
1805 return -1;
1806 }
1807
1808 loc = setlocale(LC_NUMERIC, NULL);
1809 if (loc != NULL && strcmp(loc, oldloc) == 0) {
1810 loc = NULL;
1811 }
1812
1813 if (loc != NULL) {
1814 /* Only set the locale temporarilty the LC_CTYPE locale
1815 if LC_NUMERIC locale is different than LC_CTYPE locale and
1816 decimal_point and/or thousands_sep are non-ASCII or longer than
1817 1 byte */
1818 setlocale(LC_CTYPE, loc);
1819 }
1820 }
1821
1822 if (decimal_point != NULL) {
1823 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1824 if (*decimal_point == NULL) {
1825 goto error;
1826 }
1827 }
1828 if (thousands_sep != NULL) {
1829 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1830 if (*thousands_sep == NULL) {
1831 goto error;
1832 }
1833 }
1834
1835 if (grouping != NULL) {
1836 *grouping = lc->grouping;
1837 }
1838
1839 res = 0;
1840
1841error:
1842 if (loc != NULL) {
1843 setlocale(LC_CTYPE, oldloc);
1844 }
1845 PyMem_Free(oldloc);
1846 return res;
1847}