blob: 2a079bbadcc5f681496ac3646a382e05bfafc2c5 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
21#ifdef HAVE_FCNTL_H
22#include <fcntl.h>
23#endif /* HAVE_FCNTL_H */
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020026/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020027
28 -1: unknown
29 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
30 1: open() supports O_CLOEXEC flag, close-on-exec is set
31
Victor Stinnera555cfc2015-03-18 00:22:14 +010032 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
33 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020034int _Py_open_cloexec_works = -1;
35#endif
36
Victor Stinner99768342021-03-17 21:46:53 +010037// The value must be the same in unicodeobject.c.
38#define MAX_UNICODE 0x10ffff
39
40// mbstowcs() and mbrtowc() errors
41static const size_t DECODE_ERROR = ((size_t)-1);
42static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
43
Victor Stinner3d4226a2018-08-29 22:21:32 +020044
45static int
46get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
47{
48 switch (errors)
49 {
50 case _Py_ERROR_STRICT:
51 *surrogateescape = 0;
52 return 0;
53 case _Py_ERROR_SURROGATEESCAPE:
54 *surrogateescape = 1;
55 return 0;
56 default:
57 return -1;
58 }
59}
60
61
Brett Cannonefb00c02012-02-29 18:31:31 -050062PyObject *
63_Py_device_encoding(int fd)
64{
Steve Dower8fc89802015-04-12 00:26:27 -040065 int valid;
66 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070067 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040068 _Py_END_SUPPRESS_IPH
69 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050070 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040071
Victor Stinner14b9b112013-06-25 00:37:25 +020072#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010073 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050074 if (fd == 0)
75 cp = GetConsoleCP();
76 else if (fd == 1 || fd == 2)
77 cp = GetConsoleOutputCP();
78 else
79 cp = 0;
80 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
81 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010082 if (cp == 0) {
83 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050084 }
Victor Stinner35297182020-11-04 11:20:10 +010085
86 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
87#else
88 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050089#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050090}
91
Victor Stinner99768342021-03-17 21:46:53 +010092
93static size_t
94is_valid_wide_char(wchar_t ch)
95{
96 if (Py_UNICODE_IS_SURROGATE(ch)) {
97 // Reject lone surrogate characters
98 return 0;
99 }
100 if (ch > MAX_UNICODE) {
101 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
102 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
103 // it creates characters outside the [U+0000; U+10ffff] range:
104 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
105 return 0;
106 }
107 return 1;
108}
109
110
111static size_t
112_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
113{
114 size_t count = mbstowcs(dest, src, n);
115 if (dest != NULL && count != DECODE_ERROR) {
116 for (size_t i=0; i < count; i++) {
117 wchar_t ch = dest[i];
118 if (!is_valid_wide_char(ch)) {
119 return DECODE_ERROR;
120 }
121 }
122 }
123 return count;
124}
125
126
127#ifdef HAVE_MBRTOWC
128static size_t
129_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
130{
131 assert(pwc != NULL);
132 size_t count = mbrtowc(pwc, str, len, pmbs);
133 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
134 if (!is_valid_wide_char(*pwc)) {
135 return DECODE_ERROR;
136 }
137 }
138 return count;
139}
140#endif
141
142
Victor Stinnere2510952019-05-02 11:28:57 -0400143#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100144
145#define USE_FORCE_ASCII
146
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100147extern int _Py_normalize_encoding(const char *, char *, size_t);
148
Victor Stinnerd500e532018-08-28 17:27:36 +0200149/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
150 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100151 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
152 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
153 locale.getpreferredencoding() codec. For example, if command line arguments
154 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
155 UnicodeEncodeError instead of retrieving the original byte string.
156
157 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
158 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
159 one byte in range 0x80-0xff can be decoded from the locale encoding. The
160 workaround is also enabled on error, for example if getting the locale
161 failed.
162
Victor Stinnerd500e532018-08-28 17:27:36 +0200163 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
164 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
165 ASCII encoding in this case.
166
Philip Jenvey215c49a2013-01-15 13:24:12 -0800167 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100168
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200169 1: the workaround is used: Py_EncodeLocale() uses
170 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100171 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200172 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
173 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100174 -1: unknown, need to call check_force_ascii() to get the value
175*/
176static int force_ascii = -1;
177
178static int
179check_force_ascii(void)
180{
Victor Stinnerd500e532018-08-28 17:27:36 +0200181 char *loc = setlocale(LC_CTYPE, NULL);
182 if (loc == NULL) {
183 goto error;
184 }
185 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
186 /* the LC_CTYPE locale is different than C and POSIX */
187 return 0;
188 }
189
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100190#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200191 const char *codeset = nl_langinfo(CODESET);
192 if (!codeset || codeset[0] == '\0') {
193 /* CODESET is not set or empty */
194 goto error;
195 }
196
Victor Stinner54de2b12016-09-09 23:11:52 -0700197 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200198 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
199 goto error;
200 }
201
202#ifdef __hpux
203 if (strcmp(encoding, "roman8") == 0) {
204 unsigned char ch;
205 wchar_t wch;
206 size_t res;
207
208 ch = (unsigned char)0xA7;
Victor Stinner99768342021-03-17 21:46:53 +0100209 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
210 if (res != DECODE_ERROR && wch == L'\xA7') {
Victor Stinnerd500e532018-08-28 17:27:36 +0200211 /* On HP-UX withe C locale or the POSIX locale,
212 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
213 Latin1 encoding in practice. Force ASCII in this case.
214
215 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
216 return 1;
217 }
218 }
219#else
220 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100221 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700222 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100223 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700224 "ansi_x3.4_1968",
225 "ansi_x3.4_1986",
226 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100227 "cp367",
228 "csascii",
229 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700230 "iso646_us",
231 "iso_646.irv_1991",
232 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100233 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700234 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100235 NULL
236 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100237
Victor Stinnerd500e532018-08-28 17:27:36 +0200238 int is_ascii = 0;
239 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100240 if (strcmp(encoding, *alias) == 0) {
241 is_ascii = 1;
242 break;
243 }
244 }
245 if (!is_ascii) {
246 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
247 return 0;
248 }
249
Victor Stinnerd500e532018-08-28 17:27:36 +0200250 for (unsigned int i=0x80; i<=0xff; i++) {
251 char ch[1];
252 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100253 size_t res;
254
Victor Stinnerd500e532018-08-28 17:27:36 +0200255 unsigned uch = (unsigned char)i;
256 ch[0] = (char)uch;
Victor Stinner99768342021-03-17 21:46:53 +0100257 res = _Py_mbstowcs(wch, ch, 1);
258 if (res != DECODE_ERROR) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100259 /* decoding a non-ASCII character from the locale encoding succeed:
260 the locale encoding is not ASCII, force ASCII */
261 return 1;
262 }
263 }
264 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
265 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200266#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100267 return 0;
268#else
269 /* nl_langinfo(CODESET) is not available: always force ASCII */
270 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200271#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100272
273error:
Martin Panter46f50722016-05-26 05:35:26 +0000274 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100275 return 1;
276}
277
Victor Stinnerd500e532018-08-28 17:27:36 +0200278
279int
280_Py_GetForceASCII(void)
281{
282 if (force_ascii == -1) {
283 force_ascii = check_force_ascii();
284 }
285 return force_ascii;
286}
287
288
Victor Stinner353933e2018-11-23 13:08:26 +0100289void
290_Py_ResetForceASCII(void)
291{
292 force_ascii = -1;
293}
294
295
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100296static int
297encode_ascii(const wchar_t *text, char **str,
298 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200299 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100300{
301 char *result = NULL, *out;
302 size_t len, i;
303 wchar_t ch;
304
Victor Stinner3d4226a2018-08-29 22:21:32 +0200305 int surrogateescape;
306 if (get_surrogateescape(errors, &surrogateescape) < 0) {
307 return -3;
308 }
309
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100310 len = wcslen(text);
311
Victor Stinner9bee3292017-12-21 16:49:13 +0100312 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100313 if (raw_malloc) {
314 result = PyMem_RawMalloc(len + 1);
315 }
316 else {
317 result = PyMem_Malloc(len + 1);
318 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100319 if (result == NULL) {
320 return -1;
321 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100322
323 out = result;
324 for (i=0; i<len; i++) {
325 ch = text[i];
326
327 if (ch <= 0x7f) {
328 /* ASCII character */
329 *out++ = (char)ch;
330 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100331 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100332 /* UTF-8b surrogate */
333 *out++ = (char)(ch - 0xdc00);
334 }
335 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100336 if (raw_malloc) {
337 PyMem_RawFree(result);
338 }
339 else {
340 PyMem_Free(result);
341 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100342 if (error_pos != NULL) {
343 *error_pos = i;
344 }
345 if (reason) {
346 *reason = "encoding error";
347 }
348 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100349 }
350 }
351 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100352 *str = result;
353 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100354}
Victor Stinnerd500e532018-08-28 17:27:36 +0200355#else
356int
357_Py_GetForceASCII(void)
358{
359 return 0;
360}
Victor Stinner353933e2018-11-23 13:08:26 +0100361
362void
363_Py_ResetForceASCII(void)
364{
365 /* nothing to do */
366}
Victor Stinnere2510952019-05-02 11:28:57 -0400367#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100368
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100369
370#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
371static int
372decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200373 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100374{
375 wchar_t *res;
376 unsigned char *in;
377 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600378 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100379
Victor Stinner3d4226a2018-08-29 22:21:32 +0200380 int surrogateescape;
381 if (get_surrogateescape(errors, &surrogateescape) < 0) {
382 return -3;
383 }
384
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100385 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
386 return -1;
387 }
388 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
389 if (!res) {
390 return -1;
391 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100392
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100393 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100394 for (in = (unsigned char*)arg; *in; in++) {
395 unsigned char ch = *in;
396 if (ch < 128) {
397 *out++ = ch;
398 }
399 else {
400 if (!surrogateescape) {
401 PyMem_RawFree(res);
402 if (wlen) {
403 *wlen = in - (unsigned char*)arg;
404 }
405 if (reason) {
406 *reason = "decoding error";
407 }
408 return -2;
409 }
410 *out++ = 0xdc00 + ch;
411 }
412 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100413 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100414
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100415 if (wlen != NULL) {
416 *wlen = out - res;
417 }
418 *wstr = res;
419 return 0;
420}
421#endif /* !HAVE_MBRTOWC */
422
423static int
424decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200425 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000426{
427 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100428 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000429 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200430#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000431 unsigned char *in;
432 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000433 mbstate_t mbs;
434#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100435
Victor Stinner3d4226a2018-08-29 22:21:32 +0200436 int surrogateescape;
437 if (get_surrogateescape(errors, &surrogateescape) < 0) {
438 return -3;
439 }
440
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100441#ifdef HAVE_BROKEN_MBSTOWCS
442 /* Some platforms have a broken implementation of
443 * mbstowcs which does not count the characters that
444 * would result from conversion. Use an upper bound.
445 */
446 argsize = strlen(arg);
447#else
Victor Stinner99768342021-03-17 21:46:53 +0100448 argsize = _Py_mbstowcs(NULL, arg, 0);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100449#endif
Victor Stinner99768342021-03-17 21:46:53 +0100450 if (argsize != DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100451 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
452 return -1;
453 }
454 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
455 if (!res) {
456 return -1;
457 }
458
Victor Stinner99768342021-03-17 21:46:53 +0100459 count = _Py_mbstowcs(res, arg, argsize + 1);
460 if (count != DECODE_ERROR) {
461 *wstr = res;
462 if (wlen != NULL) {
463 *wlen = count;
Victor Stinner168e1172010-10-16 23:16:16 +0000464 }
Victor Stinner99768342021-03-17 21:46:53 +0100465 return 0;
Victor Stinner4e314432010-10-07 21:45:39 +0000466 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200467 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000468 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100469
Victor Stinner4e314432010-10-07 21:45:39 +0000470 /* Conversion failed. Fall back to escaping with surrogateescape. */
471#ifdef HAVE_MBRTOWC
472 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
473
474 /* Overallocate; as multi-byte characters are in the argument, the
475 actual output could use less memory. */
476 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100477 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
478 return -1;
479 }
480 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
481 if (!res) {
482 return -1;
483 }
484
Victor Stinner4e314432010-10-07 21:45:39 +0000485 in = (unsigned char*)arg;
486 out = res;
487 memset(&mbs, 0, sizeof mbs);
488 while (argsize) {
Victor Stinner99768342021-03-17 21:46:53 +0100489 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100490 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000491 /* Reached end of string; null char stored. */
492 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100493 }
494
Victor Stinner99768342021-03-17 21:46:53 +0100495 if (converted == INCOMPLETE_CHARACTER) {
Victor Stinner4e314432010-10-07 21:45:39 +0000496 /* Incomplete character. This should never happen,
497 since we provide everything that we have -
498 unless there is a bug in the C library, or I
499 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100500 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000501 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100502
Victor Stinner99768342021-03-17 21:46:53 +0100503 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100504 if (!surrogateescape) {
505 goto decode_error;
506 }
507
Victor Stinner99768342021-03-17 21:46:53 +0100508 /* Decoding error. Escape as UTF-8b, and start over in the initial
509 shift state. */
Victor Stinner4e314432010-10-07 21:45:39 +0000510 *out++ = 0xdc00 + *in++;
511 argsize--;
512 memset(&mbs, 0, sizeof mbs);
513 continue;
514 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100515
Victor Stinner99768342021-03-17 21:46:53 +0100516 // _Py_mbrtowc() reject lone surrogate characters
517 assert(!Py_UNICODE_IS_SURROGATE(*out));
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100518
Victor Stinner4e314432010-10-07 21:45:39 +0000519 /* successfully converted some bytes */
520 in += converted;
521 argsize -= converted;
522 out++;
523 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100524 if (wlen != NULL) {
525 *wlen = out - res;
526 }
527 *wstr = res;
528 return 0;
529
530decode_error:
531 PyMem_RawFree(res);
532 if (wlen) {
533 *wlen = in - (unsigned char*)arg;
534 }
535 if (reason) {
536 *reason = "decoding error";
537 }
538 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100539#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000540 /* Cannot use C locale for escaping; manually escape as if charset
541 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
542 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200543 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100544#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100545}
546
547
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100548/* Decode a byte string from the locale encoding.
549
550 Use the strict error handler if 'surrogateescape' is zero. Use the
551 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
552 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
553 can be decoded as a surrogate character, escape the bytes using the
554 surrogateescape error handler instead of decoding them.
555
Ville Skyttä61f82e02018-04-20 23:08:45 +0300556 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100557 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
558 the number of wide characters excluding the null character into *wlen.
559
560 On memory allocation failure, return -1.
561
562 On decoding error, return -2. If wlen is not NULL, write the start of
563 invalid byte sequence in the input string into *wlen. If reason is not NULL,
564 write the decoding error message into *reason.
565
Victor Stinner3d4226a2018-08-29 22:21:32 +0200566 Return -3 if the error handler 'errors' is not supported.
567
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100568 Use the Py_EncodeLocaleEx() function to encode the character string back to
569 a byte string. */
570int
571_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
572 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200573 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100574{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100575 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400576#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100577 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200578 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100579#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200580 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100581#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100582 }
583
Victor Stinnere2510952019-05-02 11:28:57 -0400584#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100585 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200586 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100587#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200588 int use_utf8 = (Py_UTF8Mode == 1);
589#ifdef MS_WINDOWS
590 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
591#endif
592 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200593 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
594 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100595 }
596
597#ifdef USE_FORCE_ASCII
598 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100599 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100600 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100601
602 if (force_ascii) {
603 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200604 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100605 }
606#endif
607
Victor Stinner3d4226a2018-08-29 22:21:32 +0200608 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400609#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100610}
611
612
Victor Stinner91106cd2017-12-13 12:29:09 +0100613/* Decode a byte string from the locale encoding with the
614 surrogateescape error handler: undecodable bytes are decoded as characters
615 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
616 character, escape the bytes using the surrogateescape error handler instead
617 of decoding them.
618
619 Return a pointer to a newly allocated wide character string, use
620 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
621 wide characters excluding the null character into *size
622
623 Return NULL on decoding error or memory allocation error. If *size* is not
624 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
625 decoding error.
626
627 Decoding errors should never happen, unless there is a bug in the C
628 library.
629
630 Use the Py_EncodeLocale() function to encode the character string back to a
631 byte string. */
632wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100633Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100634{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100635 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200636 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
637 NULL, 0,
638 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100639 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200640 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100641 if (wlen != NULL) {
642 *wlen = (size_t)res;
643 }
644 return NULL;
645 }
646 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100647}
Victor Stinner91106cd2017-12-13 12:29:09 +0100648
Victor Stinner91106cd2017-12-13 12:29:09 +0100649
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100650static int
651encode_current_locale(const wchar_t *text, char **str,
652 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200653 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100654{
Victor Stinner4e314432010-10-07 21:45:39 +0000655 const size_t len = wcslen(text);
656 char *result = NULL, *bytes = NULL;
657 size_t i, size, converted;
658 wchar_t c, buf[2];
659
Victor Stinner3d4226a2018-08-29 22:21:32 +0200660 int surrogateescape;
661 if (get_surrogateescape(errors, &surrogateescape) < 0) {
662 return -3;
663 }
664
Victor Stinner4e314432010-10-07 21:45:39 +0000665 /* The function works in two steps:
666 1. compute the length of the output buffer in bytes (size)
667 2. outputs the bytes */
668 size = 0;
669 buf[1] = 0;
670 while (1) {
671 for (i=0; i < len; i++) {
672 c = text[i];
673 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100674 if (!surrogateescape) {
675 goto encode_error;
676 }
Victor Stinner4e314432010-10-07 21:45:39 +0000677 /* UTF-8b surrogate */
678 if (bytes != NULL) {
679 *bytes++ = c - 0xdc00;
680 size--;
681 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100682 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000683 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 }
Victor Stinner4e314432010-10-07 21:45:39 +0000685 continue;
686 }
687 else {
688 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100689 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000690 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100691 }
692 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000693 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100694 }
Victor Stinner99768342021-03-17 21:46:53 +0100695 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100696 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000697 }
698 if (bytes != NULL) {
699 bytes += converted;
700 size -= converted;
701 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100702 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000703 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100704 }
Victor Stinner4e314432010-10-07 21:45:39 +0000705 }
706 }
707 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100708 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000709 break;
710 }
711
712 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100713 if (raw_malloc) {
714 result = PyMem_RawMalloc(size);
715 }
716 else {
717 result = PyMem_Malloc(size);
718 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100719 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100720 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100721 }
Victor Stinner4e314432010-10-07 21:45:39 +0000722 bytes = result;
723 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100724 *str = result;
725 return 0;
726
727encode_error:
728 if (raw_malloc) {
729 PyMem_RawFree(result);
730 }
731 else {
732 PyMem_Free(result);
733 }
734 if (error_pos != NULL) {
735 *error_pos = i;
736 }
737 if (reason) {
738 *reason = "encoding error";
739 }
740 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100741}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100742
Victor Stinner3d4226a2018-08-29 22:21:32 +0200743
744/* Encode a string to the locale encoding.
745
746 Parameters:
747
748 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
749 of PyMem_Malloc().
750 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
751 Python filesystem encoding.
752 * errors: error handler like "strict" or "surrogateescape".
753
754 Return value:
755
756 0: success, *str is set to a newly allocated decoded string.
757 -1: memory allocation failure
758 -2: encoding error, set *error_pos and *reason (if set).
759 -3: the error handler 'errors' is not supported.
760 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100761static int
762encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
763 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200764 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100765{
766 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400767#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100768 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200769 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100770#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100771 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200772 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100773#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100774 }
775
Victor Stinnere2510952019-05-02 11:28:57 -0400776#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100777 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200778 raw_malloc, errors);
779#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200780 int use_utf8 = (Py_UTF8Mode == 1);
781#ifdef MS_WINDOWS
782 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
783#endif
784 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100785 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200786 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100787 }
788
789#ifdef USE_FORCE_ASCII
790 if (force_ascii == -1) {
791 force_ascii = check_force_ascii();
792 }
793
794 if (force_ascii) {
795 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200796 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100797 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100798#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100799
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100800 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200801 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400802#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100803}
804
Victor Stinner9dd76202017-12-21 16:20:32 +0100805static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100806encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100807 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100808{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100809 char *str;
810 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200811 raw_malloc, current_locale,
812 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100813 if (res != -2 && error_pos) {
814 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100815 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100816 if (res != 0) {
817 return NULL;
818 }
819 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100820}
821
Victor Stinner91106cd2017-12-13 12:29:09 +0100822/* Encode a wide character string to the locale encoding with the
823 surrogateescape error handler: surrogate characters in the range
824 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
825
826 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
827 the memory. Return NULL on encoding or memory allocation error.
828
829 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
830 to the index of the invalid character on encoding error.
831
832 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
833 character string. */
834char*
835Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
836{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100837 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100838}
Victor Stinner91106cd2017-12-13 12:29:09 +0100839
Victor Stinner91106cd2017-12-13 12:29:09 +0100840
Victor Stinner9dd76202017-12-21 16:20:32 +0100841/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
842 instead of PyMem_Free(). */
843char*
844_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
845{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100846 return encode_locale(text, error_pos, 1, 0);
847}
848
849
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100850int
851_Py_EncodeLocaleEx(const wchar_t *text, char **str,
852 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200853 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100854{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100855 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200856 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000857}
858
Victor Stinner6672d0c2010-10-07 22:53:43 +0000859
Victor Stinner82458b62020-11-01 20:59:35 +0100860// Get the current locale encoding name:
861//
862// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
863// - Return "UTF-8" if the UTF-8 Mode is enabled
864// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100865// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100866// - Otherwise, return nl_langinfo(CODESET).
867//
Victor Stinnere662c392020-11-01 23:07:23 +0100868// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100869//
Victor Stinner710e8262020-10-31 01:02:09 +0100870// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100871wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100872_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100873{
874#ifdef _Py_FORCE_UTF8_LOCALE
875 // On Android langinfo.h and CODESET are missing,
876 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100877 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100878#else
879 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
880 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100881 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100882 }
883
Victor Stinner82458b62020-11-01 20:59:35 +0100884#ifdef MS_WINDOWS
885 wchar_t encoding[23];
886 unsigned int ansi_codepage = GetACP();
887 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
888 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
889 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100890#else
891 const char *encoding = nl_langinfo(CODESET);
892 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100893 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
894 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100895 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100896 }
Victor Stinner710e8262020-10-31 01:02:09 +0100897
Victor Stinner82458b62020-11-01 20:59:35 +0100898 wchar_t *wstr;
899 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100900 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100901 if (res < 0) {
902 return NULL;
903 }
904 return wstr;
905#endif // !MS_WINDOWS
906
907#endif // !_Py_FORCE_UTF8_LOCALE
908}
909
910
911PyObject *
912_Py_GetLocaleEncodingObject(void)
913{
Victor Stinnere662c392020-11-01 23:07:23 +0100914 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100915 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100916 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100917 return NULL;
918 }
919
920 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
921 PyMem_RawFree(encoding);
922 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100923}
924
925
Steve Dowerf2f373f2015-02-21 08:44:05 -0800926#ifdef MS_WINDOWS
927static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
928
929static void
930FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
931{
932 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
933 /* Cannot simply cast and dereference in_ptr,
934 since it might not be aligned properly */
935 __int64 in;
936 memcpy(&in, in_ptr, sizeof(in));
937 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
938 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
939}
940
941void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800942_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800943{
944 /* XXX endianness */
945 __int64 out;
946 out = time_in + secs_between_epochs;
947 out = out * 10000000 + nsec_in / 100;
948 memcpy(out_ptr, &out, sizeof(out));
949}
950
951/* Below, we *know* that ugo+r is 0444 */
952#if _S_IREAD != 0400
953#error Unsupported C library
954#endif
955static int
956attributes_to_mode(DWORD attr)
957{
958 int m = 0;
959 if (attr & FILE_ATTRIBUTE_DIRECTORY)
960 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
961 else
962 m |= _S_IFREG;
963 if (attr & FILE_ATTRIBUTE_READONLY)
964 m |= 0444;
965 else
966 m |= 0666;
967 return m;
968}
969
Steve Dowerbf1f3762015-02-21 15:26:02 -0800970void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200971_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
972 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800973{
974 memset(result, 0, sizeof(*result));
975 result->st_mode = attributes_to_mode(info->dwFileAttributes);
976 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
977 result->st_dev = info->dwVolumeSerialNumber;
978 result->st_rdev = result->st_dev;
979 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
980 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
981 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
982 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100983 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -0700984 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
985 open other name surrogate reparse points without traversing them. To
986 detect/handle these, check st_file_attributes and st_reparse_tag. */
987 result->st_reparse_tag = reparse_tag;
988 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
989 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -0800990 /* first clear the S_IFMT bits */
991 result->st_mode ^= (result->st_mode & S_IFMT);
992 /* now set the bits that make this a symlink */
993 result->st_mode |= S_IFLNK;
994 }
995 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800996}
997#endif
998
999/* Return information about a file.
1000
1001 On POSIX, use fstat().
1002
1003 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001004 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1005 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -08001006 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +02001007
1008 On Windows, set the last Windows error and return nonzero on error. On
1009 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1010 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -08001011int
Victor Stinnere134a7f2015-03-30 10:09:31 +02001012_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001013{
1014#ifdef MS_WINDOWS
1015 BY_HANDLE_FILE_INFORMATION info;
1016 HANDLE h;
1017 int type;
1018
Segev Finer5e437fb2021-04-24 01:00:27 +03001019 h = _Py_get_osfhandle_noraise(fd);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001020
1021 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -04001022 /* errno is already set by _get_osfhandle, but we also set
1023 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -08001024 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001025 return -1;
1026 }
Victor Stinnere134a7f2015-03-30 10:09:31 +02001027 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -08001028
1029 type = GetFileType(h);
1030 if (type == FILE_TYPE_UNKNOWN) {
1031 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -04001032 if (error != 0) {
1033 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001034 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -04001035 }
Steve Dowerf2f373f2015-02-21 08:44:05 -08001036 /* else: valid but unknown file */
1037 }
1038
1039 if (type != FILE_TYPE_DISK) {
1040 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001041 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001042 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001043 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001044 return 0;
1045 }
1046
1047 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001048 /* The Win32 error is already set, but we also set errno for
1049 callers who expect it */
1050 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001051 return -1;
1052 }
1053
Victor Stinnere134a7f2015-03-30 10:09:31 +02001054 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001055 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001056 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001057 return 0;
1058#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001059 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001060#endif
1061}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001062
Victor Stinnere134a7f2015-03-30 10:09:31 +02001063/* Return information about a file.
1064
1065 On POSIX, use fstat().
1066
1067 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001068 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1069 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001070 #23152.
1071
1072 Raise an exception and return -1 on error. On Windows, set the last Windows
1073 error on error. On POSIX, set errno on error. Fill status and return 0 on
1074 success.
1075
Victor Stinner6f4fae82015-04-01 18:34:32 +02001076 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1077 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001078int
1079_Py_fstat(int fd, struct _Py_stat_struct *status)
1080{
1081 int res;
1082
Victor Stinner8a1be612016-03-14 22:07:55 +01001083 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001084
Victor Stinnere134a7f2015-03-30 10:09:31 +02001085 Py_BEGIN_ALLOW_THREADS
1086 res = _Py_fstat_noraise(fd, status);
1087 Py_END_ALLOW_THREADS
1088
1089 if (res != 0) {
1090#ifdef MS_WINDOWS
1091 PyErr_SetFromWindowsErr(0);
1092#else
1093 PyErr_SetFromErrno(PyExc_OSError);
1094#endif
1095 return -1;
1096 }
1097 return 0;
1098}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001099
Victor Stinner6672d0c2010-10-07 22:53:43 +00001100/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1101 call stat() otherwise. Only fill st_mode attribute on Windows.
1102
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001103 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1104 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001105
1106int
Victor Stinnera4a75952010-10-07 22:23:10 +00001107_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001108{
1109#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001110 int err;
1111 struct _stat wstatbuf;
1112
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001113#if USE_UNICODE_WCHAR_CACHE
1114 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1115#else /* USE_UNICODE_WCHAR_CACHE */
1116 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1117#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001118 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001119 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001120
Victor Stinneree587ea2011-11-17 00:51:38 +01001121 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001122 if (!err)
1123 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001124#if !USE_UNICODE_WCHAR_CACHE
1125 PyMem_Free(wpath);
1126#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001127 return err;
1128#else
1129 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001130 PyObject *bytes;
1131 char *cpath;
1132
1133 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001134 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001135 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001136
1137 /* check for embedded null bytes */
1138 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1139 Py_DECREF(bytes);
1140 return -2;
1141 }
1142
1143 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001144 Py_DECREF(bytes);
1145 return ret;
1146#endif
1147}
1148
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001149
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001150/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001151static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001152get_inheritable(int fd, int raise)
1153{
1154#ifdef MS_WINDOWS
1155 HANDLE handle;
1156 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001157
Segev Finer5e437fb2021-04-24 01:00:27 +03001158 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001159 if (handle == INVALID_HANDLE_VALUE) {
1160 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001161 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001162 return -1;
1163 }
1164
1165 if (!GetHandleInformation(handle, &flags)) {
1166 if (raise)
1167 PyErr_SetFromWindowsErr(0);
1168 return -1;
1169 }
1170
1171 return (flags & HANDLE_FLAG_INHERIT);
1172#else
1173 int flags;
1174
1175 flags = fcntl(fd, F_GETFD, 0);
1176 if (flags == -1) {
1177 if (raise)
1178 PyErr_SetFromErrno(PyExc_OSError);
1179 return -1;
1180 }
1181 return !(flags & FD_CLOEXEC);
1182#endif
1183}
1184
1185/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001186 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001187 raise an exception and return -1 on error. */
1188int
1189_Py_get_inheritable(int fd)
1190{
1191 return get_inheritable(fd, 1);
1192}
1193
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001194
1195/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001196static int
1197set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1198{
1199#ifdef MS_WINDOWS
1200 HANDLE handle;
1201 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001202#else
1203#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1204 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001205 int request;
1206 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001207#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001208 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001209 int res;
1210#endif
1211
1212 /* atomic_flag_works can only be used to make the file descriptor
1213 non-inheritable */
1214 assert(!(atomic_flag_works != NULL && inheritable));
1215
1216 if (atomic_flag_works != NULL && !inheritable) {
1217 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001218 int isInheritable = get_inheritable(fd, raise);
1219 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001220 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001221 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001222 }
1223
1224 if (*atomic_flag_works)
1225 return 0;
1226 }
1227
1228#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03001229 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001230 if (handle == INVALID_HANDLE_VALUE) {
1231 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001232 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001233 return -1;
1234 }
1235
1236 if (inheritable)
1237 flags = HANDLE_FLAG_INHERIT;
1238 else
1239 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001240
1241 /* This check can be removed once support for Windows 7 ends. */
1242#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1243 GetFileType(handle) == FILE_TYPE_CHAR)
1244
1245 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1246 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001247 if (raise)
1248 PyErr_SetFromWindowsErr(0);
1249 return -1;
1250 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001251#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001252 return 0;
1253
Victor Stinnerdaf45552013-08-28 00:53:59 +02001254#else
Victor Stinner282124b2014-09-02 11:41:04 +02001255
1256#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001257 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001258 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001259 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1260 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001261 if (inheritable)
1262 request = FIONCLEX;
1263 else
1264 request = FIOCLEX;
1265 err = ioctl(fd, request, NULL);
1266 if (!err) {
1267 ioctl_works = 1;
1268 return 0;
1269 }
1270
cptpcrd7dc71c42021-01-20 09:05:51 -05001271#ifdef __linux__
1272 if (errno == EBADF) {
1273 // On Linux, ioctl(FIOCLEX) will fail with EBADF for O_PATH file descriptors
1274 // Fall through to the fcntl() path
1275 }
1276 else
1277#endif
Victor Stinner3116cc42016-05-19 16:46:18 +02001278 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001279 if (raise)
1280 PyErr_SetFromErrno(PyExc_OSError);
1281 return -1;
1282 }
1283 else {
1284 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1285 device". The ioctl is declared but not supported by the kernel.
1286 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001287 Illumos-based OS for example.
1288
1289 Issue #27057: When SELinux policy disallows ioctl it will fail
1290 with EACCES. While FIOCLEX is safe operation it may be
1291 unavailable because ioctl was denied altogether.
1292 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001293 ioctl_works = 0;
1294 }
1295 /* fallback to fcntl() if ioctl() does not work */
1296 }
1297#endif
1298
1299 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001300 flags = fcntl(fd, F_GETFD);
1301 if (flags < 0) {
1302 if (raise)
1303 PyErr_SetFromErrno(PyExc_OSError);
1304 return -1;
1305 }
1306
Victor Stinnera858bbd2016-04-17 16:51:52 +02001307 if (inheritable) {
1308 new_flags = flags & ~FD_CLOEXEC;
1309 }
1310 else {
1311 new_flags = flags | FD_CLOEXEC;
1312 }
1313
1314 if (new_flags == flags) {
1315 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1316 return 0;
1317 }
1318
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001319 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001320 if (res < 0) {
1321 if (raise)
1322 PyErr_SetFromErrno(PyExc_OSError);
1323 return -1;
1324 }
1325 return 0;
1326#endif
1327}
1328
1329/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001330 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001331static int
1332make_non_inheritable(int fd)
1333{
1334 return set_inheritable(fd, 0, 0, NULL);
1335}
1336
1337/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001338 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001339
1340 If atomic_flag_works is not NULL:
1341
1342 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1343 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1344 set the inheritable flag
1345 * if *atomic_flag_works==1: do nothing
1346 * if *atomic_flag_works==0: set inheritable flag to False
1347
1348 Set atomic_flag_works to NULL if no atomic flag was used to create the
1349 file descriptor.
1350
1351 atomic_flag_works can only be used to make a file descriptor
1352 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1353int
1354_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1355{
1356 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1357}
1358
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001359/* Same as _Py_set_inheritable() but on error, set errno and
1360 don't raise an exception.
1361 This function is async-signal-safe. */
1362int
1363_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1364{
1365 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1366}
1367
Victor Stinnera555cfc2015-03-18 00:22:14 +01001368static int
1369_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001370{
1371 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001372 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001373#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001374 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001375#endif
1376
1377#ifdef MS_WINDOWS
1378 flags |= O_NOINHERIT;
1379#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001380 atomic_flag_works = &_Py_open_cloexec_works;
1381 flags |= O_CLOEXEC;
1382#else
1383 atomic_flag_works = NULL;
1384#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001385
Victor Stinnera555cfc2015-03-18 00:22:14 +01001386 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001387 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1388 if (pathname_obj == NULL) {
1389 return -1;
1390 }
1391 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1392 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001393 return -1;
1394 }
1395
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001396 do {
1397 Py_BEGIN_ALLOW_THREADS
1398 fd = open(pathname, flags);
1399 Py_END_ALLOW_THREADS
1400 } while (fd < 0
1401 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001402 if (async_err) {
1403 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001404 return -1;
1405 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001406 if (fd < 0) {
1407 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1408 Py_DECREF(pathname_obj);
1409 return -1;
1410 }
1411 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001412 }
1413 else {
1414 fd = open(pathname, flags);
1415 if (fd < 0)
1416 return -1;
1417 }
1418
1419#ifndef MS_WINDOWS
1420 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001421 close(fd);
1422 return -1;
1423 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001424#endif
1425
Victor Stinnerdaf45552013-08-28 00:53:59 +02001426 return fd;
1427}
1428
Victor Stinnera555cfc2015-03-18 00:22:14 +01001429/* Open a file with the specified flags (wrapper to open() function).
1430 Return a file descriptor on success. Raise an exception and return -1 on
1431 error.
1432
1433 The file descriptor is created non-inheritable.
1434
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001435 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1436 except if the Python signal handler raises an exception.
1437
Victor Stinner6f4fae82015-04-01 18:34:32 +02001438 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001439int
1440_Py_open(const char *pathname, int flags)
1441{
1442 /* _Py_open() must be called with the GIL held. */
1443 assert(PyGILState_Check());
1444 return _Py_open_impl(pathname, flags, 1);
1445}
1446
1447/* Open a file with the specified flags (wrapper to open() function).
1448 Return a file descriptor on success. Set errno and return -1 on error.
1449
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001450 The file descriptor is created non-inheritable.
1451
1452 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001453int
1454_Py_open_noraise(const char *pathname, int flags)
1455{
1456 return _Py_open_impl(pathname, flags, 0);
1457}
1458
Victor Stinnerdaf45552013-08-28 00:53:59 +02001459/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001460 encoding and use fopen() otherwise.
1461
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001462 The file descriptor is created non-inheritable.
1463
1464 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001465FILE *
1466_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1467{
Victor Stinner4e314432010-10-07 21:45:39 +00001468 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001469 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1470 return NULL;
1471 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001472#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001473 char *cpath;
1474 char cmode[10];
1475 size_t r;
1476 r = wcstombs(cmode, mode, 10);
Victor Stinner99768342021-03-17 21:46:53 +01001477 if (r == DECODE_ERROR || r >= 10) {
Victor Stinner4e314432010-10-07 21:45:39 +00001478 errno = EINVAL;
1479 return NULL;
1480 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001481 cpath = _Py_EncodeLocaleRaw(path, NULL);
1482 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001483 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001484 }
Victor Stinner4e314432010-10-07 21:45:39 +00001485 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001486 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001487#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001488 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001489#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001490 if (f == NULL)
1491 return NULL;
1492 if (make_non_inheritable(fileno(f)) < 0) {
1493 fclose(f);
1494 return NULL;
1495 }
1496 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001497}
1498
Victor Stinnerdaf45552013-08-28 00:53:59 +02001499
1500/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001501 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001502
Victor Stinnere42ccd22015-03-18 01:39:23 +01001503 Return the new file object on success. Raise an exception and return NULL
1504 on error.
1505
1506 The file descriptor is created non-inheritable.
1507
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001508 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1509 except if the Python signal handler raises an exception.
1510
Victor Stinner6f4fae82015-04-01 18:34:32 +02001511 Release the GIL to call _wfopen() or fopen(). The caller must hold
1512 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001513FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001514_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001515{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001516 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001517 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001518#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001519 wchar_t wmode[10];
1520 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001521
Victor Stinnere42ccd22015-03-18 01:39:23 +01001522 assert(PyGILState_Check());
1523
Steve Dowerb82e17e2019-05-23 08:45:22 -07001524 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1525 return NULL;
1526 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001527 if (!PyUnicode_Check(path)) {
1528 PyErr_Format(PyExc_TypeError,
1529 "str file path expected under Windows, got %R",
1530 Py_TYPE(path));
1531 return NULL;
1532 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001533#if USE_UNICODE_WCHAR_CACHE
1534 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1535#else /* USE_UNICODE_WCHAR_CACHE */
1536 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1537#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001538 if (wpath == NULL)
1539 return NULL;
1540
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001541 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1542 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001543 if (usize == 0) {
1544 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001545#if !USE_UNICODE_WCHAR_CACHE
1546 PyMem_Free(wpath);
1547#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001548 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001549 }
Victor Stinner4e314432010-10-07 21:45:39 +00001550
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001551 do {
1552 Py_BEGIN_ALLOW_THREADS
1553 f = _wfopen(wpath, wmode);
1554 Py_END_ALLOW_THREADS
1555 } while (f == NULL
1556 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001557#if !USE_UNICODE_WCHAR_CACHE
1558 PyMem_Free(wpath);
1559#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001560#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001561 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001562 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001563
1564 assert(PyGILState_Check());
1565
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001566 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001567 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001568 path_bytes = PyBytes_AS_STRING(bytes);
1569
Steve Dowerb82e17e2019-05-23 08:45:22 -07001570 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001571 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001572 return NULL;
1573 }
1574
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001575 do {
1576 Py_BEGIN_ALLOW_THREADS
1577 f = fopen(path_bytes, mode);
1578 Py_END_ALLOW_THREADS
1579 } while (f == NULL
1580 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001581
Victor Stinner4e314432010-10-07 21:45:39 +00001582 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001583#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001584 if (async_err)
1585 return NULL;
1586
Victor Stinnere42ccd22015-03-18 01:39:23 +01001587 if (f == NULL) {
1588 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001589 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001590 }
1591
1592 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001593 fclose(f);
1594 return NULL;
1595 }
1596 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001597}
1598
Victor Stinner66aab0c2015-03-19 22:53:20 +01001599/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001600
1601 On success, return the number of read bytes, it can be lower than count.
1602 If the current file offset is at or past the end of file, no bytes are read,
1603 and read() returns zero.
1604
1605 On error, raise an exception, set errno and return -1.
1606
1607 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1608 If the Python signal handler raises an exception, the function returns -1
1609 (the syscall is not retried).
1610
1611 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001612Py_ssize_t
1613_Py_read(int fd, void *buf, size_t count)
1614{
1615 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001616 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001617 int async_err = 0;
1618
Victor Stinner8a1be612016-03-14 22:07:55 +01001619 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001620
Victor Stinner66aab0c2015-03-19 22:53:20 +01001621 /* _Py_read() must not be called with an exception set, otherwise the
1622 * caller may think that read() was interrupted by a signal and the signal
1623 * handler raised an exception. */
1624 assert(!PyErr_Occurred());
1625
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001626 if (count > _PY_READ_MAX) {
1627 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001628 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001629
Steve Dower8fc89802015-04-12 00:26:27 -04001630 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001631 do {
1632 Py_BEGIN_ALLOW_THREADS
1633 errno = 0;
1634#ifdef MS_WINDOWS
1635 n = read(fd, buf, (int)count);
1636#else
1637 n = read(fd, buf, count);
1638#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001639 /* save/restore errno because PyErr_CheckSignals()
1640 * and PyErr_SetFromErrno() can modify it */
1641 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001642 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001643 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001644 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001645 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001646
1647 if (async_err) {
1648 /* read() was interrupted by a signal (failed with EINTR)
1649 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001650 errno = err;
1651 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001652 return -1;
1653 }
1654 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001655 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001656 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001657 return -1;
1658 }
1659
1660 return n;
1661}
1662
Victor Stinner82c3e452015-04-01 18:34:45 +02001663static Py_ssize_t
1664_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001665{
1666 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001667 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001668 int async_err = 0;
1669
Steve Dower8fc89802015-04-12 00:26:27 -04001670 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001671#ifdef MS_WINDOWS
1672 if (count > 32767 && isatty(fd)) {
1673 /* Issue #11395: the Windows console returns an error (12: not
1674 enough space error) on writing into stdout if stdout mode is
1675 binary and the length is greater than 66,000 bytes (or less,
1676 depending on heap usage). */
1677 count = 32767;
1678 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001679#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001680 if (count > _PY_WRITE_MAX) {
1681 count = _PY_WRITE_MAX;
1682 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001683
Victor Stinner82c3e452015-04-01 18:34:45 +02001684 if (gil_held) {
1685 do {
1686 Py_BEGIN_ALLOW_THREADS
1687 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001688#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001689 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001690#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001691 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001692#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001693 /* save/restore errno because PyErr_CheckSignals()
1694 * and PyErr_SetFromErrno() can modify it */
1695 err = errno;
1696 Py_END_ALLOW_THREADS
1697 } while (n < 0 && err == EINTR &&
1698 !(async_err = PyErr_CheckSignals()));
1699 }
1700 else {
1701 do {
1702 errno = 0;
1703#ifdef MS_WINDOWS
1704 n = write(fd, buf, (int)count);
1705#else
1706 n = write(fd, buf, count);
1707#endif
1708 err = errno;
1709 } while (n < 0 && err == EINTR);
1710 }
Steve Dower8fc89802015-04-12 00:26:27 -04001711 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001712
1713 if (async_err) {
1714 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001715 and the Python signal handler raised an exception (if gil_held is
1716 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001717 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001718 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001719 return -1;
1720 }
1721 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001722 if (gil_held)
1723 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001724 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001725 return -1;
1726 }
1727
1728 return n;
1729}
1730
Victor Stinner82c3e452015-04-01 18:34:45 +02001731/* Write count bytes of buf into fd.
1732
1733 On success, return the number of written bytes, it can be lower than count
1734 including 0. On error, raise an exception, set errno and return -1.
1735
1736 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1737 If the Python signal handler raises an exception, the function returns -1
1738 (the syscall is not retried).
1739
1740 Release the GIL to call write(). The caller must hold the GIL. */
1741Py_ssize_t
1742_Py_write(int fd, const void *buf, size_t count)
1743{
Victor Stinner8a1be612016-03-14 22:07:55 +01001744 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001745
Victor Stinner82c3e452015-04-01 18:34:45 +02001746 /* _Py_write() must not be called with an exception set, otherwise the
1747 * caller may think that write() was interrupted by a signal and the signal
1748 * handler raised an exception. */
1749 assert(!PyErr_Occurred());
1750
1751 return _Py_write_impl(fd, buf, count, 1);
1752}
1753
1754/* Write count bytes of buf into fd.
1755 *
1756 * On success, return the number of written bytes, it can be lower than count
1757 * including 0. On error, set errno and return -1.
1758 *
1759 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1760 * without calling the Python signal handler. */
1761Py_ssize_t
1762_Py_write_noraise(int fd, const void *buf, size_t count)
1763{
1764 return _Py_write_impl(fd, buf, count, 0);
1765}
1766
Victor Stinner4e314432010-10-07 21:45:39 +00001767#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001768
1769/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001770 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001771
Victor Stinner1be0d112019-03-18 17:47:26 +01001772 Return -1 on encoding error, on readlink() error, if the internal buffer is
1773 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001774int
Victor Stinner1be0d112019-03-18 17:47:26 +01001775_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001776{
1777 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001778 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001779 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001780 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001781 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001782 size_t r1;
1783
Victor Stinner9dd76202017-12-21 16:20:32 +01001784 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001785 if (cpath == NULL) {
1786 errno = EINVAL;
1787 return -1;
1788 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001789 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001790 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001791 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001792 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001793 }
1794 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001795 errno = EINVAL;
1796 return -1;
1797 }
1798 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001799 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001800 if (wbuf == NULL) {
1801 errno = EINVAL;
1802 return -1;
1803 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001804 /* wbuf must have space to store the trailing NUL character */
1805 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001806 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001807 errno = EINVAL;
1808 return -1;
1809 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001810 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001811 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001812 return (int)r1;
1813}
1814#endif
1815
1816#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001817
1818/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001819 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001820
Victor Stinner1be0d112019-03-18 17:47:26 +01001821 Return NULL on encoding error, realpath() error, decoding error
1822 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001823wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001824_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001825 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001826{
1827 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001828 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001829 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001830 char *res;
1831 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001832 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001833 if (cpath == NULL) {
1834 errno = EINVAL;
1835 return NULL;
1836 }
1837 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001838 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001839 if (res == NULL)
1840 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001841
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001842 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001843 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001844 errno = EINVAL;
1845 return NULL;
1846 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001847 /* wresolved_path must have space to store the trailing NUL character */
1848 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001849 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001850 errno = EINVAL;
1851 return NULL;
1852 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001853 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001854 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001855 return resolved_path;
1856}
1857#endif
1858
Victor Stinner3939c322019-06-25 15:02:43 +02001859
1860#ifndef MS_WINDOWS
1861int
1862_Py_isabs(const wchar_t *path)
1863{
1864 return (path[0] == SEP);
1865}
1866#endif
1867
1868
1869/* Get an absolute path.
1870 On error (ex: fail to get the current directory), return -1.
1871 On memory allocation failure, set *abspath_p to NULL and return 0.
1872 On success, return a newly allocated to *abspath_p to and return 0.
1873 The string must be freed by PyMem_RawFree(). */
1874int
1875_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1876{
1877#ifdef MS_WINDOWS
1878 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1879 DWORD result;
1880
1881 result = GetFullPathNameW(path,
1882 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1883 NULL);
1884 if (!result) {
1885 return -1;
1886 }
1887
1888 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1889 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1890 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1891 }
1892 else {
1893 woutbufp = NULL;
1894 }
1895 if (!woutbufp) {
1896 *abspath_p = NULL;
1897 return 0;
1898 }
1899
1900 result = GetFullPathNameW(path, result, woutbufp, NULL);
1901 if (!result) {
1902 PyMem_RawFree(woutbufp);
1903 return -1;
1904 }
1905 }
1906
1907 if (woutbufp != woutbuf) {
1908 *abspath_p = woutbufp;
1909 return 0;
1910 }
1911
1912 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1913 return 0;
1914#else
1915 if (_Py_isabs(path)) {
1916 *abspath_p = _PyMem_RawWcsdup(path);
1917 return 0;
1918 }
1919
1920 wchar_t cwd[MAXPATHLEN + 1];
1921 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
1922 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
1923 /* unable to get the current directory */
1924 return -1;
1925 }
1926
1927 size_t cwd_len = wcslen(cwd);
1928 size_t path_len = wcslen(path);
1929 size_t len = cwd_len + 1 + path_len + 1;
1930 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1931 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
1932 }
1933 else {
1934 *abspath_p = NULL;
1935 }
1936 if (*abspath_p == NULL) {
1937 return 0;
1938 }
1939
1940 wchar_t *abspath = *abspath_p;
1941 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
1942 abspath += cwd_len;
1943
1944 *abspath = (wchar_t)SEP;
1945 abspath++;
1946
1947 memcpy(abspath, path, path_len * sizeof(wchar_t));
1948 abspath += path_len;
1949
1950 *abspath = 0;
1951 return 0;
1952#endif
1953}
1954
1955
Victor Stinnerfaddaed2019-03-19 02:58:14 +01001956/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001957 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001958
Victor Stinner1be0d112019-03-18 17:47:26 +01001959 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1960 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001961wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01001962_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001963{
1964#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01001965 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1966 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00001967#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001968 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001969 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001970 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001971
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001972 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001973 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001974 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001975 if (wname == NULL)
1976 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01001977 /* wname must have space to store the trailing NUL character */
1978 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001979 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001980 return NULL;
1981 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001982 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001983 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001984 return buf;
1985#endif
1986}
1987
Victor Stinnerdaf45552013-08-28 00:53:59 +02001988/* Duplicate a file descriptor. The new file descriptor is created as
1989 non-inheritable. Return a new file descriptor on success, raise an OSError
1990 exception and return -1 on error.
1991
1992 The GIL is released to call dup(). The caller must hold the GIL. */
1993int
1994_Py_dup(int fd)
1995{
1996#ifdef MS_WINDOWS
1997 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001998#endif
1999
Victor Stinner8a1be612016-03-14 22:07:55 +01002000 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01002001
Victor Stinnerdaf45552013-08-28 00:53:59 +02002002#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03002003 handle = _Py_get_osfhandle(fd);
2004 if (handle == INVALID_HANDLE_VALUE)
Victor Stinnerdaf45552013-08-28 00:53:59 +02002005 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002006
Victor Stinnerdaf45552013-08-28 00:53:59 +02002007 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002008 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002009 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002010 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002011 Py_END_ALLOW_THREADS
2012 if (fd < 0) {
2013 PyErr_SetFromErrno(PyExc_OSError);
2014 return -1;
2015 }
2016
Zackery Spytz28fca0c2019-06-17 01:17:14 -06002017 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2018 _Py_BEGIN_SUPPRESS_IPH
2019 close(fd);
2020 _Py_END_SUPPRESS_IPH
2021 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002022 }
2023#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2024 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002025 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002026 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002027 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002028 Py_END_ALLOW_THREADS
2029 if (fd < 0) {
2030 PyErr_SetFromErrno(PyExc_OSError);
2031 return -1;
2032 }
2033
2034#else
2035 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002036 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002037 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002038 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002039 Py_END_ALLOW_THREADS
2040 if (fd < 0) {
2041 PyErr_SetFromErrno(PyExc_OSError);
2042 return -1;
2043 }
2044
2045 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002046 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002047 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002048 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002049 return -1;
2050 }
2051#endif
2052 return fd;
2053}
2054
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002055#ifndef MS_WINDOWS
2056/* Get the blocking mode of the file descriptor.
2057 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2058 raise an exception and return -1 on error. */
2059int
2060_Py_get_blocking(int fd)
2061{
Steve Dower8fc89802015-04-12 00:26:27 -04002062 int flags;
2063 _Py_BEGIN_SUPPRESS_IPH
2064 flags = fcntl(fd, F_GETFL, 0);
2065 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002066 if (flags < 0) {
2067 PyErr_SetFromErrno(PyExc_OSError);
2068 return -1;
2069 }
2070
2071 return !(flags & O_NONBLOCK);
2072}
2073
2074/* Set the blocking mode of the specified file descriptor.
2075
2076 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2077 otherwise.
2078
2079 Return 0 on success, raise an exception and return -1 on error. */
2080int
2081_Py_set_blocking(int fd, int blocking)
2082{
pxinwr06afac62020-12-08 04:41:12 +08002083/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2084 Use fcntl() instead. */
2085#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002086 int arg = !blocking;
2087 if (ioctl(fd, FIONBIO, &arg) < 0)
2088 goto error;
2089#else
2090 int flags, res;
2091
Steve Dower8fc89802015-04-12 00:26:27 -04002092 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002093 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002094 if (flags >= 0) {
2095 if (blocking)
2096 flags = flags & (~O_NONBLOCK);
2097 else
2098 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002099
Steve Dower8fc89802015-04-12 00:26:27 -04002100 res = fcntl(fd, F_SETFL, flags);
2101 } else {
2102 res = -1;
2103 }
2104 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002105
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002106 if (res < 0)
2107 goto error;
2108#endif
2109 return 0;
2110
2111error:
2112 PyErr_SetFromErrno(PyExc_OSError);
2113 return -1;
2114}
Segev Finer5e437fb2021-04-24 01:00:27 +03002115#else /* MS_WINDOWS */
2116void*
2117_Py_get_osfhandle_noraise(int fd)
2118{
2119 void *handle;
2120 _Py_BEGIN_SUPPRESS_IPH
2121 handle = (void*)_get_osfhandle(fd);
2122 _Py_END_SUPPRESS_IPH
2123 return handle;
2124}
Victor Stinnercb064fc2018-01-15 15:58:02 +01002125
Segev Finer5e437fb2021-04-24 01:00:27 +03002126void*
2127_Py_get_osfhandle(int fd)
2128{
2129 void *handle = _Py_get_osfhandle_noraise(fd);
2130 if (handle == INVALID_HANDLE_VALUE)
2131 PyErr_SetFromErrno(PyExc_OSError);
2132
2133 return handle;
2134}
2135
2136int
2137_Py_open_osfhandle_noraise(void *handle, int flags)
2138{
2139 int fd;
2140 _Py_BEGIN_SUPPRESS_IPH
2141 fd = _open_osfhandle((intptr_t)handle, flags);
2142 _Py_END_SUPPRESS_IPH
2143 return fd;
2144}
2145
2146int
2147_Py_open_osfhandle(void *handle, int flags)
2148{
2149 int fd = _Py_open_osfhandle_noraise(handle, flags);
2150 if (fd == -1)
2151 PyErr_SetFromErrno(PyExc_OSError);
2152
2153 return fd;
2154}
2155#endif /* MS_WINDOWS */
Victor Stinnercb064fc2018-01-15 15:58:02 +01002156
2157int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002158_Py_GetLocaleconvNumeric(struct lconv *lc,
2159 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002160{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002161 assert(decimal_point != NULL);
2162 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002163
TIGirardif2312032020-10-20 08:39:52 -03002164#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002165 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002166 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002167 change_locale = 1;
2168 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002169 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002170 change_locale = 1;
2171 }
2172
2173 /* Keep a copy of the LC_CTYPE locale */
2174 char *oldloc = NULL, *loc = NULL;
2175 if (change_locale) {
2176 oldloc = setlocale(LC_CTYPE, NULL);
2177 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002178 PyErr_SetString(PyExc_RuntimeWarning,
2179 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002180 return -1;
2181 }
2182
2183 oldloc = _PyMem_Strdup(oldloc);
2184 if (!oldloc) {
2185 PyErr_NoMemory();
2186 return -1;
2187 }
2188
2189 loc = setlocale(LC_NUMERIC, NULL);
2190 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2191 loc = NULL;
2192 }
2193
2194 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002195 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002196 if LC_NUMERIC locale is different than LC_CTYPE locale and
2197 decimal_point and/or thousands_sep are non-ASCII or longer than
2198 1 byte */
2199 setlocale(LC_CTYPE, loc);
2200 }
2201 }
2202
TIGirardif2312032020-10-20 08:39:52 -03002203#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2204#else /* MS_WINDOWS */
2205/* Use _W_* fields of Windows strcut lconv */
2206#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2207#endif /* MS_WINDOWS */
2208
Victor Stinner02e6bf72018-11-20 16:20:16 +01002209 int res = -1;
2210
TIGirardif2312032020-10-20 08:39:52 -03002211 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002212 if (*decimal_point == NULL) {
2213 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002214 }
2215
TIGirardif2312032020-10-20 08:39:52 -03002216 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002217 if (*thousands_sep == NULL) {
2218 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002219 }
2220
2221 res = 0;
2222
Victor Stinner02e6bf72018-11-20 16:20:16 +01002223done:
TIGirardif2312032020-10-20 08:39:52 -03002224#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002225 if (loc != NULL) {
2226 setlocale(LC_CTYPE, oldloc);
2227 }
2228 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002229#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002230 return res;
TIGirardif2312032020-10-20 08:39:52 -03002231
2232#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002233}
Kyle Evans79925792020-10-13 15:04:44 -05002234
2235/* Our selection logic for which function to use is as follows:
2236 * 1. If close_range(2) is available, always prefer that; it's better for
2237 * contiguous ranges like this than fdwalk(3) which entails iterating over
2238 * the entire fd space and simply doing nothing for those outside the range.
2239 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2240 * closing up to sysconf(_SC_OPEN_MAX).
2241 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2242 * as that will be more performant if the range happens to have any chunk of
2243 * non-opened fd in the middle.
2244 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2245 */
2246#ifdef __FreeBSD__
2247# define USE_CLOSEFROM
2248#endif /* __FreeBSD__ */
2249
2250#ifdef HAVE_FDWALK
2251# define USE_FDWALK
2252#endif /* HAVE_FDWALK */
2253
2254#ifdef USE_FDWALK
2255static int
2256_fdwalk_close_func(void *lohi, int fd)
2257{
2258 int lo = ((int *)lohi)[0];
2259 int hi = ((int *)lohi)[1];
2260
2261 if (fd >= hi) {
2262 return 1;
2263 }
2264 else if (fd >= lo) {
2265 /* Ignore errors */
2266 (void)close(fd);
2267 }
2268 return 0;
2269}
2270#endif /* USE_FDWALK */
2271
2272/* Closes all file descriptors in [first, last], ignoring errors. */
2273void
2274_Py_closerange(int first, int last)
2275{
2276 first = Py_MAX(first, 0);
2277 _Py_BEGIN_SUPPRESS_IPH
2278#ifdef HAVE_CLOSE_RANGE
2279 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2280 /* Any errors encountered while closing file descriptors are ignored;
2281 * ENOSYS means no kernel support, though,
2282 * so we'll fallback to the other methods. */
2283 }
2284 else
2285#endif /* HAVE_CLOSE_RANGE */
2286#ifdef USE_CLOSEFROM
2287 if (last >= sysconf(_SC_OPEN_MAX)) {
2288 /* Any errors encountered while closing file descriptors are ignored */
2289 closefrom(first);
2290 }
2291 else
2292#endif /* USE_CLOSEFROM */
2293#ifdef USE_FDWALK
2294 {
2295 int lohi[2];
2296 lohi[0] = first;
2297 lohi[1] = last + 1;
2298 fdwalk(_fdwalk_close_func, lohi);
2299 }
2300#else
2301 {
2302 for (int i = first; i <= last; i++) {
2303 /* Ignore errors */
2304 (void)close(i);
2305 }
2306 }
2307#endif /* USE_FDWALK */
2308 _Py_END_SUPPRESS_IPH
2309}