blob: 9e732ddca55cece96d308069c8812ae0b2b2f0dd [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
Jakub Kulík9032cf52021-04-30 15:21:42 +020021#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
22#include <iconv.h>
23#endif
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef HAVE_FCNTL_H
26#include <fcntl.h>
27#endif /* HAVE_FCNTL_H */
28
Victor Stinnerdaf45552013-08-28 00:53:59 +020029#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020030/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020031
32 -1: unknown
33 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
34 1: open() supports O_CLOEXEC flag, close-on-exec is set
35
Victor Stinnera555cfc2015-03-18 00:22:14 +010036 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
37 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020038int _Py_open_cloexec_works = -1;
39#endif
40
Victor Stinner99768342021-03-17 21:46:53 +010041// The value must be the same in unicodeobject.c.
42#define MAX_UNICODE 0x10ffff
43
44// mbstowcs() and mbrtowc() errors
45static const size_t DECODE_ERROR = ((size_t)-1);
46static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
47
Victor Stinner3d4226a2018-08-29 22:21:32 +020048
49static int
50get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
51{
52 switch (errors)
53 {
54 case _Py_ERROR_STRICT:
55 *surrogateescape = 0;
56 return 0;
57 case _Py_ERROR_SURROGATEESCAPE:
58 *surrogateescape = 1;
59 return 0;
60 default:
61 return -1;
62 }
63}
64
65
Brett Cannonefb00c02012-02-29 18:31:31 -050066PyObject *
67_Py_device_encoding(int fd)
68{
Steve Dower8fc89802015-04-12 00:26:27 -040069 int valid;
Miss Islington (bot)23c46772021-09-09 09:35:43 -070070 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -040071 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070072 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040073 _Py_END_SUPPRESS_IPH
Miss Islington (bot)23c46772021-09-09 09:35:43 -070074 Py_END_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -040075 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050076 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040077
Victor Stinner14b9b112013-06-25 00:37:25 +020078#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010079 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050080 if (fd == 0)
81 cp = GetConsoleCP();
82 else if (fd == 1 || fd == 2)
83 cp = GetConsoleOutputCP();
84 else
85 cp = 0;
86 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
87 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010088 if (cp == 0) {
89 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050090 }
Victor Stinner35297182020-11-04 11:20:10 +010091
92 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
93#else
94 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050095#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050096}
97
Victor Stinner99768342021-03-17 21:46:53 +010098
99static size_t
100is_valid_wide_char(wchar_t ch)
101{
Jakub Kulík9032cf52021-04-30 15:21:42 +0200102#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
103 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
104 for non-Unicode locales, which makes values higher than MAX_UNICODE
105 possibly valid. */
106 return 1;
107#endif
Victor Stinner99768342021-03-17 21:46:53 +0100108 if (Py_UNICODE_IS_SURROGATE(ch)) {
109 // Reject lone surrogate characters
110 return 0;
111 }
112 if (ch > MAX_UNICODE) {
113 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
114 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
115 // it creates characters outside the [U+0000; U+10ffff] range:
116 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
117 return 0;
118 }
119 return 1;
120}
121
122
123static size_t
124_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
125{
126 size_t count = mbstowcs(dest, src, n);
127 if (dest != NULL && count != DECODE_ERROR) {
128 for (size_t i=0; i < count; i++) {
129 wchar_t ch = dest[i];
130 if (!is_valid_wide_char(ch)) {
131 return DECODE_ERROR;
132 }
133 }
134 }
135 return count;
136}
137
138
139#ifdef HAVE_MBRTOWC
140static size_t
141_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
142{
143 assert(pwc != NULL);
144 size_t count = mbrtowc(pwc, str, len, pmbs);
145 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
146 if (!is_valid_wide_char(*pwc)) {
147 return DECODE_ERROR;
148 }
149 }
150 return count;
151}
152#endif
153
154
Victor Stinnere2510952019-05-02 11:28:57 -0400155#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100156
157#define USE_FORCE_ASCII
158
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100159extern int _Py_normalize_encoding(const char *, char *, size_t);
160
Victor Stinnerd500e532018-08-28 17:27:36 +0200161/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
162 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100163 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
164 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
165 locale.getpreferredencoding() codec. For example, if command line arguments
166 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
167 UnicodeEncodeError instead of retrieving the original byte string.
168
169 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
170 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
171 one byte in range 0x80-0xff can be decoded from the locale encoding. The
172 workaround is also enabled on error, for example if getting the locale
173 failed.
174
Victor Stinnerd500e532018-08-28 17:27:36 +0200175 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
176 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
177 ASCII encoding in this case.
178
Philip Jenvey215c49a2013-01-15 13:24:12 -0800179 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200181 1: the workaround is used: Py_EncodeLocale() uses
182 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100183 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200184 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
185 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100186 -1: unknown, need to call check_force_ascii() to get the value
187*/
188static int force_ascii = -1;
189
190static int
191check_force_ascii(void)
192{
Victor Stinnerd500e532018-08-28 17:27:36 +0200193 char *loc = setlocale(LC_CTYPE, NULL);
194 if (loc == NULL) {
195 goto error;
196 }
197 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
198 /* the LC_CTYPE locale is different than C and POSIX */
199 return 0;
200 }
201
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100202#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200203 const char *codeset = nl_langinfo(CODESET);
204 if (!codeset || codeset[0] == '\0') {
205 /* CODESET is not set or empty */
206 goto error;
207 }
208
Victor Stinner54de2b12016-09-09 23:11:52 -0700209 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200210 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
211 goto error;
212 }
213
214#ifdef __hpux
215 if (strcmp(encoding, "roman8") == 0) {
216 unsigned char ch;
217 wchar_t wch;
218 size_t res;
219
220 ch = (unsigned char)0xA7;
Victor Stinner99768342021-03-17 21:46:53 +0100221 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
222 if (res != DECODE_ERROR && wch == L'\xA7') {
Victor Stinnerd500e532018-08-28 17:27:36 +0200223 /* On HP-UX withe C locale or the POSIX locale,
224 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
225 Latin1 encoding in practice. Force ASCII in this case.
226
227 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
228 return 1;
229 }
230 }
231#else
232 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100233 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700234 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100235 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700236 "ansi_x3.4_1968",
237 "ansi_x3.4_1986",
238 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100239 "cp367",
240 "csascii",
241 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700242 "iso646_us",
243 "iso_646.irv_1991",
244 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100245 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700246 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100247 NULL
248 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100249
Victor Stinnerd500e532018-08-28 17:27:36 +0200250 int is_ascii = 0;
251 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100252 if (strcmp(encoding, *alias) == 0) {
253 is_ascii = 1;
254 break;
255 }
256 }
257 if (!is_ascii) {
258 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
259 return 0;
260 }
261
Victor Stinnerd500e532018-08-28 17:27:36 +0200262 for (unsigned int i=0x80; i<=0xff; i++) {
263 char ch[1];
264 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100265 size_t res;
266
Victor Stinnerd500e532018-08-28 17:27:36 +0200267 unsigned uch = (unsigned char)i;
268 ch[0] = (char)uch;
Victor Stinner99768342021-03-17 21:46:53 +0100269 res = _Py_mbstowcs(wch, ch, 1);
270 if (res != DECODE_ERROR) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100271 /* decoding a non-ASCII character from the locale encoding succeed:
272 the locale encoding is not ASCII, force ASCII */
273 return 1;
274 }
275 }
276 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
277 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200278#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100279 return 0;
280#else
281 /* nl_langinfo(CODESET) is not available: always force ASCII */
282 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200283#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100284
285error:
Martin Panter46f50722016-05-26 05:35:26 +0000286 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100287 return 1;
288}
289
Victor Stinnerd500e532018-08-28 17:27:36 +0200290
291int
292_Py_GetForceASCII(void)
293{
294 if (force_ascii == -1) {
295 force_ascii = check_force_ascii();
296 }
297 return force_ascii;
298}
299
300
Victor Stinner353933e2018-11-23 13:08:26 +0100301void
302_Py_ResetForceASCII(void)
303{
304 force_ascii = -1;
305}
306
307
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100308static int
309encode_ascii(const wchar_t *text, char **str,
310 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200311 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100312{
313 char *result = NULL, *out;
314 size_t len, i;
315 wchar_t ch;
316
Victor Stinner3d4226a2018-08-29 22:21:32 +0200317 int surrogateescape;
318 if (get_surrogateescape(errors, &surrogateescape) < 0) {
319 return -3;
320 }
321
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100322 len = wcslen(text);
323
Victor Stinner9bee3292017-12-21 16:49:13 +0100324 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100325 if (raw_malloc) {
326 result = PyMem_RawMalloc(len + 1);
327 }
328 else {
329 result = PyMem_Malloc(len + 1);
330 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100331 if (result == NULL) {
332 return -1;
333 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100334
335 out = result;
336 for (i=0; i<len; i++) {
337 ch = text[i];
338
339 if (ch <= 0x7f) {
340 /* ASCII character */
341 *out++ = (char)ch;
342 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100343 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100344 /* UTF-8b surrogate */
345 *out++ = (char)(ch - 0xdc00);
346 }
347 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100348 if (raw_malloc) {
349 PyMem_RawFree(result);
350 }
351 else {
352 PyMem_Free(result);
353 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100354 if (error_pos != NULL) {
355 *error_pos = i;
356 }
357 if (reason) {
358 *reason = "encoding error";
359 }
360 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100361 }
362 }
363 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100364 *str = result;
365 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100366}
Victor Stinnerd500e532018-08-28 17:27:36 +0200367#else
368int
369_Py_GetForceASCII(void)
370{
371 return 0;
372}
Victor Stinner353933e2018-11-23 13:08:26 +0100373
374void
375_Py_ResetForceASCII(void)
376{
377 /* nothing to do */
378}
Victor Stinnere2510952019-05-02 11:28:57 -0400379#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100380
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100381
382#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
383static int
384decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200385 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100386{
387 wchar_t *res;
388 unsigned char *in;
389 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600390 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100391
Victor Stinner3d4226a2018-08-29 22:21:32 +0200392 int surrogateescape;
393 if (get_surrogateescape(errors, &surrogateescape) < 0) {
394 return -3;
395 }
396
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100397 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
398 return -1;
399 }
400 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
401 if (!res) {
402 return -1;
403 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100404
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100405 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100406 for (in = (unsigned char*)arg; *in; in++) {
407 unsigned char ch = *in;
408 if (ch < 128) {
409 *out++ = ch;
410 }
411 else {
412 if (!surrogateescape) {
413 PyMem_RawFree(res);
414 if (wlen) {
415 *wlen = in - (unsigned char*)arg;
416 }
417 if (reason) {
418 *reason = "decoding error";
419 }
420 return -2;
421 }
422 *out++ = 0xdc00 + ch;
423 }
424 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100425 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100426
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100427 if (wlen != NULL) {
428 *wlen = out - res;
429 }
430 *wstr = res;
431 return 0;
432}
433#endif /* !HAVE_MBRTOWC */
434
435static int
436decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200437 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000438{
439 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100440 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000441 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200442#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000443 unsigned char *in;
444 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000445 mbstate_t mbs;
446#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100447
Victor Stinner3d4226a2018-08-29 22:21:32 +0200448 int surrogateescape;
449 if (get_surrogateescape(errors, &surrogateescape) < 0) {
450 return -3;
451 }
452
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100453#ifdef HAVE_BROKEN_MBSTOWCS
454 /* Some platforms have a broken implementation of
455 * mbstowcs which does not count the characters that
456 * would result from conversion. Use an upper bound.
457 */
458 argsize = strlen(arg);
459#else
Victor Stinner99768342021-03-17 21:46:53 +0100460 argsize = _Py_mbstowcs(NULL, arg, 0);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100461#endif
Victor Stinner99768342021-03-17 21:46:53 +0100462 if (argsize != DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100463 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
464 return -1;
465 }
466 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
467 if (!res) {
468 return -1;
469 }
470
Victor Stinner99768342021-03-17 21:46:53 +0100471 count = _Py_mbstowcs(res, arg, argsize + 1);
472 if (count != DECODE_ERROR) {
473 *wstr = res;
474 if (wlen != NULL) {
475 *wlen = count;
Victor Stinner168e1172010-10-16 23:16:16 +0000476 }
Victor Stinner99768342021-03-17 21:46:53 +0100477 return 0;
Victor Stinner4e314432010-10-07 21:45:39 +0000478 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200479 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000480 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100481
Victor Stinner4e314432010-10-07 21:45:39 +0000482 /* Conversion failed. Fall back to escaping with surrogateescape. */
483#ifdef HAVE_MBRTOWC
484 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
485
486 /* Overallocate; as multi-byte characters are in the argument, the
487 actual output could use less memory. */
488 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100489 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
490 return -1;
491 }
492 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
493 if (!res) {
494 return -1;
495 }
496
Victor Stinner4e314432010-10-07 21:45:39 +0000497 in = (unsigned char*)arg;
498 out = res;
499 memset(&mbs, 0, sizeof mbs);
500 while (argsize) {
Victor Stinner99768342021-03-17 21:46:53 +0100501 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100502 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000503 /* Reached end of string; null char stored. */
504 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100505 }
506
Victor Stinner99768342021-03-17 21:46:53 +0100507 if (converted == INCOMPLETE_CHARACTER) {
Victor Stinner4e314432010-10-07 21:45:39 +0000508 /* Incomplete character. This should never happen,
509 since we provide everything that we have -
510 unless there is a bug in the C library, or I
511 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100512 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000513 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100514
Victor Stinner99768342021-03-17 21:46:53 +0100515 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100516 if (!surrogateescape) {
517 goto decode_error;
518 }
519
Victor Stinner99768342021-03-17 21:46:53 +0100520 /* Decoding error. Escape as UTF-8b, and start over in the initial
521 shift state. */
Victor Stinner4e314432010-10-07 21:45:39 +0000522 *out++ = 0xdc00 + *in++;
523 argsize--;
524 memset(&mbs, 0, sizeof mbs);
525 continue;
526 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100527
Victor Stinner99768342021-03-17 21:46:53 +0100528 // _Py_mbrtowc() reject lone surrogate characters
529 assert(!Py_UNICODE_IS_SURROGATE(*out));
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100530
Victor Stinner4e314432010-10-07 21:45:39 +0000531 /* successfully converted some bytes */
532 in += converted;
533 argsize -= converted;
534 out++;
535 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100536 if (wlen != NULL) {
537 *wlen = out - res;
538 }
539 *wstr = res;
540 return 0;
541
542decode_error:
543 PyMem_RawFree(res);
544 if (wlen) {
545 *wlen = in - (unsigned char*)arg;
546 }
547 if (reason) {
548 *reason = "decoding error";
549 }
550 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100551#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000552 /* Cannot use C locale for escaping; manually escape as if charset
553 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
554 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200555 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100556#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100557}
558
559
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100560/* Decode a byte string from the locale encoding.
561
562 Use the strict error handler if 'surrogateescape' is zero. Use the
563 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
564 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
565 can be decoded as a surrogate character, escape the bytes using the
566 surrogateescape error handler instead of decoding them.
567
Ville Skyttä61f82e02018-04-20 23:08:45 +0300568 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100569 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
570 the number of wide characters excluding the null character into *wlen.
571
572 On memory allocation failure, return -1.
573
574 On decoding error, return -2. If wlen is not NULL, write the start of
575 invalid byte sequence in the input string into *wlen. If reason is not NULL,
576 write the decoding error message into *reason.
577
Victor Stinner3d4226a2018-08-29 22:21:32 +0200578 Return -3 if the error handler 'errors' is not supported.
579
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100580 Use the Py_EncodeLocaleEx() function to encode the character string back to
581 a byte string. */
582int
583_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
584 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200585 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100586{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100587 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400588#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100589 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200590 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100591#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200592 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100593#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100594 }
595
Victor Stinnere2510952019-05-02 11:28:57 -0400596#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100597 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200598 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100599#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200600 int use_utf8 = (Py_UTF8Mode == 1);
601#ifdef MS_WINDOWS
602 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
603#endif
604 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200605 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
606 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100607 }
608
609#ifdef USE_FORCE_ASCII
610 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100611 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100612 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100613
614 if (force_ascii) {
615 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200616 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100617 }
618#endif
619
Victor Stinner3d4226a2018-08-29 22:21:32 +0200620 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400621#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100622}
623
624
Victor Stinner91106cd2017-12-13 12:29:09 +0100625/* Decode a byte string from the locale encoding with the
626 surrogateescape error handler: undecodable bytes are decoded as characters
627 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
628 character, escape the bytes using the surrogateescape error handler instead
629 of decoding them.
630
631 Return a pointer to a newly allocated wide character string, use
632 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
633 wide characters excluding the null character into *size
634
635 Return NULL on decoding error or memory allocation error. If *size* is not
636 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
637 decoding error.
638
639 Decoding errors should never happen, unless there is a bug in the C
640 library.
641
642 Use the Py_EncodeLocale() function to encode the character string back to a
643 byte string. */
644wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100645Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100646{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100647 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200648 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
649 NULL, 0,
650 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200652 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100653 if (wlen != NULL) {
654 *wlen = (size_t)res;
655 }
656 return NULL;
657 }
658 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100659}
Victor Stinner91106cd2017-12-13 12:29:09 +0100660
Victor Stinner91106cd2017-12-13 12:29:09 +0100661
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100662static int
663encode_current_locale(const wchar_t *text, char **str,
664 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200665 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100666{
Victor Stinner4e314432010-10-07 21:45:39 +0000667 const size_t len = wcslen(text);
668 char *result = NULL, *bytes = NULL;
669 size_t i, size, converted;
670 wchar_t c, buf[2];
671
Victor Stinner3d4226a2018-08-29 22:21:32 +0200672 int surrogateescape;
673 if (get_surrogateescape(errors, &surrogateescape) < 0) {
674 return -3;
675 }
676
Victor Stinner4e314432010-10-07 21:45:39 +0000677 /* The function works in two steps:
678 1. compute the length of the output buffer in bytes (size)
679 2. outputs the bytes */
680 size = 0;
681 buf[1] = 0;
682 while (1) {
683 for (i=0; i < len; i++) {
684 c = text[i];
685 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100686 if (!surrogateescape) {
687 goto encode_error;
688 }
Victor Stinner4e314432010-10-07 21:45:39 +0000689 /* UTF-8b surrogate */
690 if (bytes != NULL) {
691 *bytes++ = c - 0xdc00;
692 size--;
693 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100694 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000695 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100696 }
Victor Stinner4e314432010-10-07 21:45:39 +0000697 continue;
698 }
699 else {
700 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100701 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000702 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100703 }
704 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000705 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100706 }
Victor Stinner99768342021-03-17 21:46:53 +0100707 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100708 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000709 }
710 if (bytes != NULL) {
711 bytes += converted;
712 size -= converted;
713 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100714 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000715 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100716 }
Victor Stinner4e314432010-10-07 21:45:39 +0000717 }
718 }
719 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100720 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000721 break;
722 }
723
724 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100725 if (raw_malloc) {
726 result = PyMem_RawMalloc(size);
727 }
728 else {
729 result = PyMem_Malloc(size);
730 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100731 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100732 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100733 }
Victor Stinner4e314432010-10-07 21:45:39 +0000734 bytes = result;
735 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100736 *str = result;
737 return 0;
738
739encode_error:
740 if (raw_malloc) {
741 PyMem_RawFree(result);
742 }
743 else {
744 PyMem_Free(result);
745 }
746 if (error_pos != NULL) {
747 *error_pos = i;
748 }
749 if (reason) {
750 *reason = "encoding error";
751 }
752 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100753}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100754
Victor Stinner3d4226a2018-08-29 22:21:32 +0200755
756/* Encode a string to the locale encoding.
757
758 Parameters:
759
760 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
761 of PyMem_Malloc().
762 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
763 Python filesystem encoding.
764 * errors: error handler like "strict" or "surrogateescape".
765
766 Return value:
767
768 0: success, *str is set to a newly allocated decoded string.
769 -1: memory allocation failure
770 -2: encoding error, set *error_pos and *reason (if set).
771 -3: the error handler 'errors' is not supported.
772 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100773static int
774encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
775 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200776 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100777{
778 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400779#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100780 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200781 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100782#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100783 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200784 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100785#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100786 }
787
Victor Stinnere2510952019-05-02 11:28:57 -0400788#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100789 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200790 raw_malloc, errors);
791#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200792 int use_utf8 = (Py_UTF8Mode == 1);
793#ifdef MS_WINDOWS
794 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
795#endif
796 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100797 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200798 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100799 }
800
801#ifdef USE_FORCE_ASCII
802 if (force_ascii == -1) {
803 force_ascii = check_force_ascii();
804 }
805
806 if (force_ascii) {
807 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200808 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100809 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100810#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100811
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100812 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200813 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400814#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100815}
816
Victor Stinner9dd76202017-12-21 16:20:32 +0100817static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100818encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100819 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100820{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100821 char *str;
822 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200823 raw_malloc, current_locale,
824 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100825 if (res != -2 && error_pos) {
826 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100827 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100828 if (res != 0) {
829 return NULL;
830 }
831 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100832}
833
Victor Stinner91106cd2017-12-13 12:29:09 +0100834/* Encode a wide character string to the locale encoding with the
835 surrogateescape error handler: surrogate characters in the range
836 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
837
838 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
839 the memory. Return NULL on encoding or memory allocation error.
840
841 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
842 to the index of the invalid character on encoding error.
843
844 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
845 character string. */
846char*
847Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
848{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100849 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100850}
Victor Stinner91106cd2017-12-13 12:29:09 +0100851
Victor Stinner91106cd2017-12-13 12:29:09 +0100852
Victor Stinner9dd76202017-12-21 16:20:32 +0100853/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
854 instead of PyMem_Free(). */
855char*
856_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
857{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100858 return encode_locale(text, error_pos, 1, 0);
859}
860
861
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100862int
863_Py_EncodeLocaleEx(const wchar_t *text, char **str,
864 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200865 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100866{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100867 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200868 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000869}
870
Victor Stinner6672d0c2010-10-07 22:53:43 +0000871
Victor Stinner82458b62020-11-01 20:59:35 +0100872// Get the current locale encoding name:
873//
874// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
875// - Return "UTF-8" if the UTF-8 Mode is enabled
876// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100877// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100878// - Otherwise, return nl_langinfo(CODESET).
879//
Victor Stinnere662c392020-11-01 23:07:23 +0100880// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100881//
Victor Stinner710e8262020-10-31 01:02:09 +0100882// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100883wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100884_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100885{
886#ifdef _Py_FORCE_UTF8_LOCALE
887 // On Android langinfo.h and CODESET are missing,
888 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100889 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100890#else
891 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
892 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100893 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100894 }
895
Victor Stinner82458b62020-11-01 20:59:35 +0100896#ifdef MS_WINDOWS
897 wchar_t encoding[23];
898 unsigned int ansi_codepage = GetACP();
899 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
900 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
901 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100902#else
903 const char *encoding = nl_langinfo(CODESET);
904 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100905 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
906 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100907 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100908 }
Victor Stinner710e8262020-10-31 01:02:09 +0100909
Victor Stinner82458b62020-11-01 20:59:35 +0100910 wchar_t *wstr;
911 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100912 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100913 if (res < 0) {
914 return NULL;
915 }
916 return wstr;
917#endif // !MS_WINDOWS
918
919#endif // !_Py_FORCE_UTF8_LOCALE
920}
921
922
923PyObject *
924_Py_GetLocaleEncodingObject(void)
925{
Victor Stinnere662c392020-11-01 23:07:23 +0100926 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100927 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100928 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100929 return NULL;
930 }
931
932 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
933 PyMem_RawFree(encoding);
934 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100935}
936
Jakub Kulík9032cf52021-04-30 15:21:42 +0200937#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
938
939/* Check whether current locale uses Unicode as internal wchar_t form. */
940int
941_Py_LocaleUsesNonUnicodeWchar(void)
942{
943 /* Oracle Solaris uses non-Unicode internal wchar_t form for
944 non-Unicode locales and hence needs conversion to UTF first. */
945 char* codeset = nl_langinfo(CODESET);
946 if (!codeset) {
947 return 0;
948 }
949 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
950 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
951}
952
953static wchar_t *
954_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
955 const char *tocode, const char *fromcode)
956{
957 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
958
959 /* Ensure we won't overflow the size. */
960 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
961 PyErr_NoMemory();
962 return NULL;
963 }
964
965 /* the string doesn't have to be NULL terminated */
966 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
967 if (target == NULL) {
968 PyErr_NoMemory();
969 return NULL;
970 }
971
972 iconv_t cd = iconv_open(tocode, fromcode);
973 if (cd == (iconv_t)-1) {
974 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
975 PyMem_Free(target);
976 return NULL;
977 }
978
979 char *inbuf = (char *) source;
980 char *outbuf = (char *) target;
981 size_t inbytesleft = sizeof(wchar_t) * size;
982 size_t outbytesleft = inbytesleft;
983
984 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
985 if (ret == DECODE_ERROR) {
986 PyErr_Format(PyExc_ValueError, "iconv() failed");
987 PyMem_Free(target);
988 iconv_close(cd);
989 return NULL;
990 }
991
992 iconv_close(cd);
993 return target;
994}
995
996/* Convert a wide character string to the UCS-4 encoded string. This
997 is necessary on systems where internal form of wchar_t are not Unicode
998 code points (e.g. Oracle Solaris).
999
1000 Return a pointer to a newly allocated string, use PyMem_Free() to free
1001 the memory. Return NULL and raise exception on conversion or memory
1002 allocation error. */
1003wchar_t *
1004_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1005{
1006 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1007}
1008
1009/* Convert a UCS-4 encoded string to native wide character string. This
1010 is necessary on systems where internal form of wchar_t are not Unicode
1011 code points (e.g. Oracle Solaris).
1012
1013 The conversion is done in place. This can be done because both wchar_t
1014 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1015 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1016 which is currently the only system using these functions; it doesn't have
1017 to be for other systems).
1018
1019 Return 0 on success. Return -1 and raise exception on conversion
1020 or memory allocation error. */
1021int
1022_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1023{
1024 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1025 if (!result) {
1026 return -1;
1027 }
1028 memcpy(unicode, result, size * sizeof(wchar_t));
1029 PyMem_Free(result);
1030 return 0;
1031}
1032#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
Victor Stinner710e8262020-10-31 01:02:09 +01001033
Steve Dowerf2f373f2015-02-21 08:44:05 -08001034#ifdef MS_WINDOWS
1035static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1036
1037static void
1038FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1039{
1040 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1041 /* Cannot simply cast and dereference in_ptr,
1042 since it might not be aligned properly */
1043 __int64 in;
1044 memcpy(&in, in_ptr, sizeof(in));
1045 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1046 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1047}
1048
1049void
Steve Dowerbf1f3762015-02-21 15:26:02 -08001050_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001051{
1052 /* XXX endianness */
1053 __int64 out;
1054 out = time_in + secs_between_epochs;
1055 out = out * 10000000 + nsec_in / 100;
1056 memcpy(out_ptr, &out, sizeof(out));
1057}
1058
1059/* Below, we *know* that ugo+r is 0444 */
1060#if _S_IREAD != 0400
1061#error Unsupported C library
1062#endif
1063static int
1064attributes_to_mode(DWORD attr)
1065{
1066 int m = 0;
1067 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1068 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1069 else
1070 m |= _S_IFREG;
1071 if (attr & FILE_ATTRIBUTE_READONLY)
1072 m |= 0444;
1073 else
1074 m |= 0666;
1075 return m;
1076}
1077
Steve Dowerbf1f3762015-02-21 15:26:02 -08001078void
Victor Stinnere134a7f2015-03-30 10:09:31 +02001079_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1080 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001081{
1082 memset(result, 0, sizeof(*result));
1083 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1084 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1085 result->st_dev = info->dwVolumeSerialNumber;
1086 result->st_rdev = result->st_dev;
1087 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1088 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1089 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1090 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +01001091 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -07001092 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1093 open other name surrogate reparse points without traversing them. To
1094 detect/handle these, check st_file_attributes and st_reparse_tag. */
1095 result->st_reparse_tag = reparse_tag;
1096 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1097 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -08001098 /* first clear the S_IFMT bits */
1099 result->st_mode ^= (result->st_mode & S_IFMT);
1100 /* now set the bits that make this a symlink */
1101 result->st_mode |= S_IFLNK;
1102 }
1103 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001104}
1105#endif
1106
1107/* Return information about a file.
1108
1109 On POSIX, use fstat().
1110
1111 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001112 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1113 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -08001114 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +02001115
1116 On Windows, set the last Windows error and return nonzero on error. On
1117 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1118 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -08001119int
Victor Stinnere134a7f2015-03-30 10:09:31 +02001120_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001121{
1122#ifdef MS_WINDOWS
1123 BY_HANDLE_FILE_INFORMATION info;
1124 HANDLE h;
1125 int type;
1126
Segev Finer5e437fb2021-04-24 01:00:27 +03001127 h = _Py_get_osfhandle_noraise(fd);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001128
1129 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -04001130 /* errno is already set by _get_osfhandle, but we also set
1131 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -08001132 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001133 return -1;
1134 }
Victor Stinnere134a7f2015-03-30 10:09:31 +02001135 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -08001136
1137 type = GetFileType(h);
1138 if (type == FILE_TYPE_UNKNOWN) {
1139 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -04001140 if (error != 0) {
1141 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001142 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -04001143 }
Steve Dowerf2f373f2015-02-21 08:44:05 -08001144 /* else: valid but unknown file */
1145 }
1146
1147 if (type != FILE_TYPE_DISK) {
1148 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001149 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001150 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001151 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001152 return 0;
1153 }
1154
1155 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001156 /* The Win32 error is already set, but we also set errno for
1157 callers who expect it */
1158 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001159 return -1;
1160 }
1161
Victor Stinnere134a7f2015-03-30 10:09:31 +02001162 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001163 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001164 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001165 return 0;
1166#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001167 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001168#endif
1169}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001170
Victor Stinnere134a7f2015-03-30 10:09:31 +02001171/* Return information about a file.
1172
1173 On POSIX, use fstat().
1174
1175 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001176 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1177 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001178 #23152.
1179
1180 Raise an exception and return -1 on error. On Windows, set the last Windows
1181 error on error. On POSIX, set errno on error. Fill status and return 0 on
1182 success.
1183
Victor Stinner6f4fae82015-04-01 18:34:32 +02001184 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1185 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001186int
1187_Py_fstat(int fd, struct _Py_stat_struct *status)
1188{
1189 int res;
1190
Victor Stinner8a1be612016-03-14 22:07:55 +01001191 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001192
Victor Stinnere134a7f2015-03-30 10:09:31 +02001193 Py_BEGIN_ALLOW_THREADS
1194 res = _Py_fstat_noraise(fd, status);
1195 Py_END_ALLOW_THREADS
1196
1197 if (res != 0) {
1198#ifdef MS_WINDOWS
1199 PyErr_SetFromWindowsErr(0);
1200#else
1201 PyErr_SetFromErrno(PyExc_OSError);
1202#endif
1203 return -1;
1204 }
1205 return 0;
1206}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001207
Victor Stinner6672d0c2010-10-07 22:53:43 +00001208/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1209 call stat() otherwise. Only fill st_mode attribute on Windows.
1210
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001211 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1212 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001213
1214int
Victor Stinnera4a75952010-10-07 22:23:10 +00001215_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001216{
1217#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001218 int err;
1219 struct _stat wstatbuf;
1220
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001221#if USE_UNICODE_WCHAR_CACHE
1222 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1223#else /* USE_UNICODE_WCHAR_CACHE */
1224 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1225#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001226 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001227 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001228
Victor Stinneree587ea2011-11-17 00:51:38 +01001229 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001230 if (!err)
1231 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001232#if !USE_UNICODE_WCHAR_CACHE
1233 PyMem_Free(wpath);
1234#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001235 return err;
1236#else
1237 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001238 PyObject *bytes;
1239 char *cpath;
1240
1241 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001242 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001243 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001244
1245 /* check for embedded null bytes */
1246 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1247 Py_DECREF(bytes);
1248 return -2;
1249 }
1250
1251 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001252 Py_DECREF(bytes);
1253 return ret;
1254#endif
1255}
1256
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001257
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001258/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001259static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001260get_inheritable(int fd, int raise)
1261{
1262#ifdef MS_WINDOWS
1263 HANDLE handle;
1264 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001265
Segev Finer5e437fb2021-04-24 01:00:27 +03001266 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001267 if (handle == INVALID_HANDLE_VALUE) {
1268 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001269 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001270 return -1;
1271 }
1272
1273 if (!GetHandleInformation(handle, &flags)) {
1274 if (raise)
1275 PyErr_SetFromWindowsErr(0);
1276 return -1;
1277 }
1278
1279 return (flags & HANDLE_FLAG_INHERIT);
1280#else
1281 int flags;
1282
1283 flags = fcntl(fd, F_GETFD, 0);
1284 if (flags == -1) {
1285 if (raise)
1286 PyErr_SetFromErrno(PyExc_OSError);
1287 return -1;
1288 }
1289 return !(flags & FD_CLOEXEC);
1290#endif
1291}
1292
1293/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001294 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001295 raise an exception and return -1 on error. */
1296int
1297_Py_get_inheritable(int fd)
1298{
1299 return get_inheritable(fd, 1);
1300}
1301
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001302
1303/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001304static int
1305set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1306{
1307#ifdef MS_WINDOWS
1308 HANDLE handle;
1309 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001310#else
1311#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1312 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001313 int request;
1314 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001315#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001316 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001317 int res;
1318#endif
1319
1320 /* atomic_flag_works can only be used to make the file descriptor
1321 non-inheritable */
1322 assert(!(atomic_flag_works != NULL && inheritable));
1323
1324 if (atomic_flag_works != NULL && !inheritable) {
1325 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001326 int isInheritable = get_inheritable(fd, raise);
1327 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001328 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001329 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001330 }
1331
1332 if (*atomic_flag_works)
1333 return 0;
1334 }
1335
1336#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03001337 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001338 if (handle == INVALID_HANDLE_VALUE) {
1339 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001340 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001341 return -1;
1342 }
1343
1344 if (inheritable)
1345 flags = HANDLE_FLAG_INHERIT;
1346 else
1347 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001348
1349 /* This check can be removed once support for Windows 7 ends. */
1350#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1351 GetFileType(handle) == FILE_TYPE_CHAR)
1352
1353 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1354 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001355 if (raise)
1356 PyErr_SetFromWindowsErr(0);
1357 return -1;
1358 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001359#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001360 return 0;
1361
Victor Stinnerdaf45552013-08-28 00:53:59 +02001362#else
Victor Stinner282124b2014-09-02 11:41:04 +02001363
1364#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001365 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001366 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001367 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1368 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001369 if (inheritable)
1370 request = FIONCLEX;
1371 else
1372 request = FIOCLEX;
1373 err = ioctl(fd, request, NULL);
1374 if (!err) {
1375 ioctl_works = 1;
1376 return 0;
1377 }
1378
Miss Islington (bot)2ae22352021-08-06 06:40:44 -07001379#ifdef O_PATH
cptpcrd7dc71c42021-01-20 09:05:51 -05001380 if (errno == EBADF) {
Miss Islington (bot)2ae22352021-08-06 06:40:44 -07001381 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1382 // on O_PATH file descriptors. Fall through to the fcntl()
1383 // implementation.
cptpcrd7dc71c42021-01-20 09:05:51 -05001384 }
1385 else
1386#endif
Victor Stinner3116cc42016-05-19 16:46:18 +02001387 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001388 if (raise)
1389 PyErr_SetFromErrno(PyExc_OSError);
1390 return -1;
1391 }
1392 else {
1393 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1394 device". The ioctl is declared but not supported by the kernel.
1395 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001396 Illumos-based OS for example.
1397
1398 Issue #27057: When SELinux policy disallows ioctl it will fail
1399 with EACCES. While FIOCLEX is safe operation it may be
1400 unavailable because ioctl was denied altogether.
1401 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001402 ioctl_works = 0;
1403 }
1404 /* fallback to fcntl() if ioctl() does not work */
1405 }
1406#endif
1407
1408 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001409 flags = fcntl(fd, F_GETFD);
1410 if (flags < 0) {
1411 if (raise)
1412 PyErr_SetFromErrno(PyExc_OSError);
1413 return -1;
1414 }
1415
Victor Stinnera858bbd2016-04-17 16:51:52 +02001416 if (inheritable) {
1417 new_flags = flags & ~FD_CLOEXEC;
1418 }
1419 else {
1420 new_flags = flags | FD_CLOEXEC;
1421 }
1422
1423 if (new_flags == flags) {
1424 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1425 return 0;
1426 }
1427
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001428 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001429 if (res < 0) {
1430 if (raise)
1431 PyErr_SetFromErrno(PyExc_OSError);
1432 return -1;
1433 }
1434 return 0;
1435#endif
1436}
1437
1438/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001439 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001440static int
1441make_non_inheritable(int fd)
1442{
1443 return set_inheritable(fd, 0, 0, NULL);
1444}
1445
1446/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001447 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001448
1449 If atomic_flag_works is not NULL:
1450
1451 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1452 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1453 set the inheritable flag
1454 * if *atomic_flag_works==1: do nothing
1455 * if *atomic_flag_works==0: set inheritable flag to False
1456
1457 Set atomic_flag_works to NULL if no atomic flag was used to create the
1458 file descriptor.
1459
1460 atomic_flag_works can only be used to make a file descriptor
1461 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1462int
1463_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1464{
1465 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1466}
1467
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001468/* Same as _Py_set_inheritable() but on error, set errno and
1469 don't raise an exception.
1470 This function is async-signal-safe. */
1471int
1472_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1473{
1474 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1475}
1476
Victor Stinnera555cfc2015-03-18 00:22:14 +01001477static int
1478_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001479{
1480 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001481 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001482#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001483 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001484#endif
1485
1486#ifdef MS_WINDOWS
1487 flags |= O_NOINHERIT;
1488#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001489 atomic_flag_works = &_Py_open_cloexec_works;
1490 flags |= O_CLOEXEC;
1491#else
1492 atomic_flag_works = NULL;
1493#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001494
Victor Stinnera555cfc2015-03-18 00:22:14 +01001495 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001496 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1497 if (pathname_obj == NULL) {
1498 return -1;
1499 }
1500 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1501 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001502 return -1;
1503 }
1504
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001505 do {
1506 Py_BEGIN_ALLOW_THREADS
1507 fd = open(pathname, flags);
1508 Py_END_ALLOW_THREADS
1509 } while (fd < 0
1510 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001511 if (async_err) {
1512 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001513 return -1;
1514 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001515 if (fd < 0) {
1516 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1517 Py_DECREF(pathname_obj);
1518 return -1;
1519 }
1520 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001521 }
1522 else {
1523 fd = open(pathname, flags);
1524 if (fd < 0)
1525 return -1;
1526 }
1527
1528#ifndef MS_WINDOWS
1529 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001530 close(fd);
1531 return -1;
1532 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001533#endif
1534
Victor Stinnerdaf45552013-08-28 00:53:59 +02001535 return fd;
1536}
1537
Victor Stinnera555cfc2015-03-18 00:22:14 +01001538/* Open a file with the specified flags (wrapper to open() function).
1539 Return a file descriptor on success. Raise an exception and return -1 on
1540 error.
1541
1542 The file descriptor is created non-inheritable.
1543
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001544 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1545 except if the Python signal handler raises an exception.
1546
Victor Stinner6f4fae82015-04-01 18:34:32 +02001547 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001548int
1549_Py_open(const char *pathname, int flags)
1550{
1551 /* _Py_open() must be called with the GIL held. */
1552 assert(PyGILState_Check());
1553 return _Py_open_impl(pathname, flags, 1);
1554}
1555
1556/* Open a file with the specified flags (wrapper to open() function).
1557 Return a file descriptor on success. Set errno and return -1 on error.
1558
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001559 The file descriptor is created non-inheritable.
1560
1561 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001562int
1563_Py_open_noraise(const char *pathname, int flags)
1564{
1565 return _Py_open_impl(pathname, flags, 0);
1566}
1567
Victor Stinnerdaf45552013-08-28 00:53:59 +02001568/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001569 encoding and use fopen() otherwise.
1570
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001571 The file descriptor is created non-inheritable.
1572
1573 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001574FILE *
1575_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1576{
Victor Stinner4e314432010-10-07 21:45:39 +00001577 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001578 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1579 return NULL;
1580 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001581#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001582 char *cpath;
1583 char cmode[10];
1584 size_t r;
1585 r = wcstombs(cmode, mode, 10);
Victor Stinner99768342021-03-17 21:46:53 +01001586 if (r == DECODE_ERROR || r >= 10) {
Victor Stinner4e314432010-10-07 21:45:39 +00001587 errno = EINVAL;
1588 return NULL;
1589 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001590 cpath = _Py_EncodeLocaleRaw(path, NULL);
1591 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001592 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001593 }
Victor Stinner4e314432010-10-07 21:45:39 +00001594 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001595 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001596#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001597 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001598#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001599 if (f == NULL)
1600 return NULL;
1601 if (make_non_inheritable(fileno(f)) < 0) {
1602 fclose(f);
1603 return NULL;
1604 }
1605 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001606}
1607
Victor Stinnerdaf45552013-08-28 00:53:59 +02001608
1609/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001610 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001611
Victor Stinnere42ccd22015-03-18 01:39:23 +01001612 Return the new file object on success. Raise an exception and return NULL
1613 on error.
1614
1615 The file descriptor is created non-inheritable.
1616
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001617 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1618 except if the Python signal handler raises an exception.
1619
Victor Stinner6f4fae82015-04-01 18:34:32 +02001620 Release the GIL to call _wfopen() or fopen(). The caller must hold
1621 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001622FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001623_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001624{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001625 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001626 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001627#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001628 wchar_t wmode[10];
1629 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001630
Victor Stinnere42ccd22015-03-18 01:39:23 +01001631 assert(PyGILState_Check());
1632
Steve Dowerb82e17e2019-05-23 08:45:22 -07001633 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1634 return NULL;
1635 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001636 if (!PyUnicode_Check(path)) {
1637 PyErr_Format(PyExc_TypeError,
1638 "str file path expected under Windows, got %R",
1639 Py_TYPE(path));
1640 return NULL;
1641 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001642#if USE_UNICODE_WCHAR_CACHE
1643 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1644#else /* USE_UNICODE_WCHAR_CACHE */
1645 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1646#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001647 if (wpath == NULL)
1648 return NULL;
1649
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001650 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1651 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001652 if (usize == 0) {
1653 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001654#if !USE_UNICODE_WCHAR_CACHE
1655 PyMem_Free(wpath);
1656#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001657 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001658 }
Victor Stinner4e314432010-10-07 21:45:39 +00001659
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001660 do {
1661 Py_BEGIN_ALLOW_THREADS
1662 f = _wfopen(wpath, wmode);
1663 Py_END_ALLOW_THREADS
1664 } while (f == NULL
1665 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001666#if !USE_UNICODE_WCHAR_CACHE
1667 PyMem_Free(wpath);
1668#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001669#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001670 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001671 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001672
1673 assert(PyGILState_Check());
1674
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001675 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001676 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001677 path_bytes = PyBytes_AS_STRING(bytes);
1678
Steve Dowerb82e17e2019-05-23 08:45:22 -07001679 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001680 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001681 return NULL;
1682 }
1683
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001684 do {
1685 Py_BEGIN_ALLOW_THREADS
1686 f = fopen(path_bytes, mode);
1687 Py_END_ALLOW_THREADS
1688 } while (f == NULL
1689 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001690
Victor Stinner4e314432010-10-07 21:45:39 +00001691 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001692#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001693 if (async_err)
1694 return NULL;
1695
Victor Stinnere42ccd22015-03-18 01:39:23 +01001696 if (f == NULL) {
1697 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001698 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001699 }
1700
1701 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001702 fclose(f);
1703 return NULL;
1704 }
1705 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001706}
1707
Victor Stinner66aab0c2015-03-19 22:53:20 +01001708/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001709
1710 On success, return the number of read bytes, it can be lower than count.
1711 If the current file offset is at or past the end of file, no bytes are read,
1712 and read() returns zero.
1713
1714 On error, raise an exception, set errno and return -1.
1715
1716 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1717 If the Python signal handler raises an exception, the function returns -1
1718 (the syscall is not retried).
1719
1720 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001721Py_ssize_t
1722_Py_read(int fd, void *buf, size_t count)
1723{
1724 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001725 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001726 int async_err = 0;
1727
Victor Stinner8a1be612016-03-14 22:07:55 +01001728 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001729
Victor Stinner66aab0c2015-03-19 22:53:20 +01001730 /* _Py_read() must not be called with an exception set, otherwise the
1731 * caller may think that read() was interrupted by a signal and the signal
1732 * handler raised an exception. */
1733 assert(!PyErr_Occurred());
1734
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001735 if (count > _PY_READ_MAX) {
1736 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001737 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001738
Steve Dower8fc89802015-04-12 00:26:27 -04001739 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001740 do {
1741 Py_BEGIN_ALLOW_THREADS
1742 errno = 0;
1743#ifdef MS_WINDOWS
1744 n = read(fd, buf, (int)count);
1745#else
1746 n = read(fd, buf, count);
1747#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001748 /* save/restore errno because PyErr_CheckSignals()
1749 * and PyErr_SetFromErrno() can modify it */
1750 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001751 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001752 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001753 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001754 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001755
1756 if (async_err) {
1757 /* read() was interrupted by a signal (failed with EINTR)
1758 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001759 errno = err;
1760 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001761 return -1;
1762 }
1763 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001764 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001765 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001766 return -1;
1767 }
1768
1769 return n;
1770}
1771
Victor Stinner82c3e452015-04-01 18:34:45 +02001772static Py_ssize_t
1773_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001774{
1775 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001776 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001777 int async_err = 0;
1778
Steve Dower8fc89802015-04-12 00:26:27 -04001779 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001780#ifdef MS_WINDOWS
Miss Islington (bot)23c46772021-09-09 09:35:43 -07001781 if (count > 32767) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001782 /* Issue #11395: the Windows console returns an error (12: not
1783 enough space error) on writing into stdout if stdout mode is
1784 binary and the length is greater than 66,000 bytes (or less,
1785 depending on heap usage). */
Miss Islington (bot)23c46772021-09-09 09:35:43 -07001786 if (gil_held) {
1787 Py_BEGIN_ALLOW_THREADS
1788 if (isatty(fd)) {
1789 count = 32767;
1790 }
1791 Py_END_ALLOW_THREADS
1792 } else {
1793 if (isatty(fd)) {
1794 count = 32767;
1795 }
1796 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001797 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001798#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001799 if (count > _PY_WRITE_MAX) {
1800 count = _PY_WRITE_MAX;
1801 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001802
Victor Stinner82c3e452015-04-01 18:34:45 +02001803 if (gil_held) {
1804 do {
1805 Py_BEGIN_ALLOW_THREADS
1806 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001807#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001808 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001809#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001810 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001811#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001812 /* save/restore errno because PyErr_CheckSignals()
1813 * and PyErr_SetFromErrno() can modify it */
1814 err = errno;
1815 Py_END_ALLOW_THREADS
1816 } while (n < 0 && err == EINTR &&
1817 !(async_err = PyErr_CheckSignals()));
1818 }
1819 else {
1820 do {
1821 errno = 0;
1822#ifdef MS_WINDOWS
1823 n = write(fd, buf, (int)count);
1824#else
1825 n = write(fd, buf, count);
1826#endif
1827 err = errno;
1828 } while (n < 0 && err == EINTR);
1829 }
Steve Dower8fc89802015-04-12 00:26:27 -04001830 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001831
1832 if (async_err) {
1833 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001834 and the Python signal handler raised an exception (if gil_held is
1835 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001836 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001837 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001838 return -1;
1839 }
1840 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001841 if (gil_held)
1842 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001843 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001844 return -1;
1845 }
1846
1847 return n;
1848}
1849
Victor Stinner82c3e452015-04-01 18:34:45 +02001850/* Write count bytes of buf into fd.
1851
1852 On success, return the number of written bytes, it can be lower than count
1853 including 0. On error, raise an exception, set errno and return -1.
1854
1855 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1856 If the Python signal handler raises an exception, the function returns -1
1857 (the syscall is not retried).
1858
1859 Release the GIL to call write(). The caller must hold the GIL. */
1860Py_ssize_t
1861_Py_write(int fd, const void *buf, size_t count)
1862{
Victor Stinner8a1be612016-03-14 22:07:55 +01001863 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001864
Victor Stinner82c3e452015-04-01 18:34:45 +02001865 /* _Py_write() must not be called with an exception set, otherwise the
1866 * caller may think that write() was interrupted by a signal and the signal
1867 * handler raised an exception. */
1868 assert(!PyErr_Occurred());
1869
1870 return _Py_write_impl(fd, buf, count, 1);
1871}
1872
1873/* Write count bytes of buf into fd.
1874 *
1875 * On success, return the number of written bytes, it can be lower than count
1876 * including 0. On error, set errno and return -1.
1877 *
1878 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1879 * without calling the Python signal handler. */
1880Py_ssize_t
1881_Py_write_noraise(int fd, const void *buf, size_t count)
1882{
1883 return _Py_write_impl(fd, buf, count, 0);
1884}
1885
Victor Stinner4e314432010-10-07 21:45:39 +00001886#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001887
1888/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001889 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001890
Victor Stinner1be0d112019-03-18 17:47:26 +01001891 Return -1 on encoding error, on readlink() error, if the internal buffer is
1892 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001893int
Victor Stinner1be0d112019-03-18 17:47:26 +01001894_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001895{
1896 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001897 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001898 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001899 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001900 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001901 size_t r1;
1902
Victor Stinner9dd76202017-12-21 16:20:32 +01001903 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001904 if (cpath == NULL) {
1905 errno = EINVAL;
1906 return -1;
1907 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001908 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001909 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001910 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001911 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001912 }
1913 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001914 errno = EINVAL;
1915 return -1;
1916 }
1917 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001918 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001919 if (wbuf == NULL) {
1920 errno = EINVAL;
1921 return -1;
1922 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001923 /* wbuf must have space to store the trailing NUL character */
1924 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001925 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001926 errno = EINVAL;
1927 return -1;
1928 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001929 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001930 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001931 return (int)r1;
1932}
1933#endif
1934
1935#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001936
1937/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001938 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001939
Victor Stinner1be0d112019-03-18 17:47:26 +01001940 Return NULL on encoding error, realpath() error, decoding error
1941 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001942wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001943_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001944 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001945{
1946 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001947 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001948 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001949 char *res;
1950 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001951 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001952 if (cpath == NULL) {
1953 errno = EINVAL;
1954 return NULL;
1955 }
1956 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001957 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001958 if (res == NULL)
1959 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001960
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001961 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001962 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001963 errno = EINVAL;
1964 return NULL;
1965 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001966 /* wresolved_path must have space to store the trailing NUL character */
1967 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001968 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001969 errno = EINVAL;
1970 return NULL;
1971 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001972 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001973 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001974 return resolved_path;
1975}
1976#endif
1977
Victor Stinner3939c322019-06-25 15:02:43 +02001978
1979#ifndef MS_WINDOWS
1980int
1981_Py_isabs(const wchar_t *path)
1982{
1983 return (path[0] == SEP);
1984}
1985#endif
1986
1987
1988/* Get an absolute path.
1989 On error (ex: fail to get the current directory), return -1.
1990 On memory allocation failure, set *abspath_p to NULL and return 0.
1991 On success, return a newly allocated to *abspath_p to and return 0.
1992 The string must be freed by PyMem_RawFree(). */
1993int
1994_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1995{
1996#ifdef MS_WINDOWS
1997 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1998 DWORD result;
1999
2000 result = GetFullPathNameW(path,
2001 Py_ARRAY_LENGTH(woutbuf), woutbuf,
2002 NULL);
2003 if (!result) {
2004 return -1;
2005 }
2006
2007 if (result > Py_ARRAY_LENGTH(woutbuf)) {
2008 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2009 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
2010 }
2011 else {
2012 woutbufp = NULL;
2013 }
2014 if (!woutbufp) {
2015 *abspath_p = NULL;
2016 return 0;
2017 }
2018
2019 result = GetFullPathNameW(path, result, woutbufp, NULL);
2020 if (!result) {
2021 PyMem_RawFree(woutbufp);
2022 return -1;
2023 }
2024 }
2025
2026 if (woutbufp != woutbuf) {
2027 *abspath_p = woutbufp;
2028 return 0;
2029 }
2030
2031 *abspath_p = _PyMem_RawWcsdup(woutbufp);
2032 return 0;
2033#else
2034 if (_Py_isabs(path)) {
2035 *abspath_p = _PyMem_RawWcsdup(path);
2036 return 0;
2037 }
2038
2039 wchar_t cwd[MAXPATHLEN + 1];
2040 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2041 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2042 /* unable to get the current directory */
2043 return -1;
2044 }
2045
2046 size_t cwd_len = wcslen(cwd);
2047 size_t path_len = wcslen(path);
2048 size_t len = cwd_len + 1 + path_len + 1;
2049 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2050 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2051 }
2052 else {
2053 *abspath_p = NULL;
2054 }
2055 if (*abspath_p == NULL) {
2056 return 0;
2057 }
2058
2059 wchar_t *abspath = *abspath_p;
2060 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2061 abspath += cwd_len;
2062
2063 *abspath = (wchar_t)SEP;
2064 abspath++;
2065
2066 memcpy(abspath, path, path_len * sizeof(wchar_t));
2067 abspath += path_len;
2068
2069 *abspath = 0;
2070 return 0;
2071#endif
2072}
2073
2074
Victor Stinnerfaddaed2019-03-19 02:58:14 +01002075/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01002076 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00002077
Victor Stinner1be0d112019-03-18 17:47:26 +01002078 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2079 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00002080wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01002081_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00002082{
2083#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01002084 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2085 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00002086#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002087 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00002088 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00002089 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00002090
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002091 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00002092 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02002093 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00002094 if (wname == NULL)
2095 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01002096 /* wname must have space to store the trailing NUL character */
2097 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02002098 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002099 return NULL;
2100 }
Victor Stinner1be0d112019-03-18 17:47:26 +01002101 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02002102 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002103 return buf;
2104#endif
2105}
2106
Victor Stinnerdaf45552013-08-28 00:53:59 +02002107/* Duplicate a file descriptor. The new file descriptor is created as
2108 non-inheritable. Return a new file descriptor on success, raise an OSError
2109 exception and return -1 on error.
2110
2111 The GIL is released to call dup(). The caller must hold the GIL. */
2112int
2113_Py_dup(int fd)
2114{
2115#ifdef MS_WINDOWS
2116 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002117#endif
2118
Victor Stinner8a1be612016-03-14 22:07:55 +01002119 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01002120
Victor Stinnerdaf45552013-08-28 00:53:59 +02002121#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03002122 handle = _Py_get_osfhandle(fd);
2123 if (handle == INVALID_HANDLE_VALUE)
Victor Stinnerdaf45552013-08-28 00:53:59 +02002124 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002125
Victor Stinnerdaf45552013-08-28 00:53:59 +02002126 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002127 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002128 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002129 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002130 Py_END_ALLOW_THREADS
2131 if (fd < 0) {
2132 PyErr_SetFromErrno(PyExc_OSError);
2133 return -1;
2134 }
2135
Zackery Spytz28fca0c2019-06-17 01:17:14 -06002136 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2137 _Py_BEGIN_SUPPRESS_IPH
2138 close(fd);
2139 _Py_END_SUPPRESS_IPH
2140 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002141 }
2142#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2143 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002144 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002145 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002146 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002147 Py_END_ALLOW_THREADS
2148 if (fd < 0) {
2149 PyErr_SetFromErrno(PyExc_OSError);
2150 return -1;
2151 }
2152
2153#else
2154 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002155 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002156 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002157 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002158 Py_END_ALLOW_THREADS
2159 if (fd < 0) {
2160 PyErr_SetFromErrno(PyExc_OSError);
2161 return -1;
2162 }
2163
2164 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002165 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002166 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002167 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002168 return -1;
2169 }
2170#endif
2171 return fd;
2172}
2173
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002174#ifndef MS_WINDOWS
2175/* Get the blocking mode of the file descriptor.
2176 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2177 raise an exception and return -1 on error. */
2178int
2179_Py_get_blocking(int fd)
2180{
Steve Dower8fc89802015-04-12 00:26:27 -04002181 int flags;
2182 _Py_BEGIN_SUPPRESS_IPH
2183 flags = fcntl(fd, F_GETFL, 0);
2184 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002185 if (flags < 0) {
2186 PyErr_SetFromErrno(PyExc_OSError);
2187 return -1;
2188 }
2189
2190 return !(flags & O_NONBLOCK);
2191}
2192
2193/* Set the blocking mode of the specified file descriptor.
2194
2195 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2196 otherwise.
2197
2198 Return 0 on success, raise an exception and return -1 on error. */
2199int
2200_Py_set_blocking(int fd, int blocking)
2201{
pxinwr06afac62020-12-08 04:41:12 +08002202/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2203 Use fcntl() instead. */
2204#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002205 int arg = !blocking;
2206 if (ioctl(fd, FIONBIO, &arg) < 0)
2207 goto error;
2208#else
2209 int flags, res;
2210
Steve Dower8fc89802015-04-12 00:26:27 -04002211 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002212 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002213 if (flags >= 0) {
2214 if (blocking)
2215 flags = flags & (~O_NONBLOCK);
2216 else
2217 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002218
Steve Dower8fc89802015-04-12 00:26:27 -04002219 res = fcntl(fd, F_SETFL, flags);
2220 } else {
2221 res = -1;
2222 }
2223 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002224
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002225 if (res < 0)
2226 goto error;
2227#endif
2228 return 0;
2229
2230error:
2231 PyErr_SetFromErrno(PyExc_OSError);
2232 return -1;
2233}
Segev Finer5e437fb2021-04-24 01:00:27 +03002234#else /* MS_WINDOWS */
2235void*
2236_Py_get_osfhandle_noraise(int fd)
2237{
2238 void *handle;
2239 _Py_BEGIN_SUPPRESS_IPH
2240 handle = (void*)_get_osfhandle(fd);
2241 _Py_END_SUPPRESS_IPH
2242 return handle;
2243}
Victor Stinnercb064fc2018-01-15 15:58:02 +01002244
Segev Finer5e437fb2021-04-24 01:00:27 +03002245void*
2246_Py_get_osfhandle(int fd)
2247{
2248 void *handle = _Py_get_osfhandle_noraise(fd);
2249 if (handle == INVALID_HANDLE_VALUE)
2250 PyErr_SetFromErrno(PyExc_OSError);
2251
2252 return handle;
2253}
2254
2255int
2256_Py_open_osfhandle_noraise(void *handle, int flags)
2257{
2258 int fd;
2259 _Py_BEGIN_SUPPRESS_IPH
2260 fd = _open_osfhandle((intptr_t)handle, flags);
2261 _Py_END_SUPPRESS_IPH
2262 return fd;
2263}
2264
2265int
2266_Py_open_osfhandle(void *handle, int flags)
2267{
2268 int fd = _Py_open_osfhandle_noraise(handle, flags);
2269 if (fd == -1)
2270 PyErr_SetFromErrno(PyExc_OSError);
2271
2272 return fd;
2273}
2274#endif /* MS_WINDOWS */
Victor Stinnercb064fc2018-01-15 15:58:02 +01002275
2276int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002277_Py_GetLocaleconvNumeric(struct lconv *lc,
2278 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002279{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002280 assert(decimal_point != NULL);
2281 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002282
TIGirardif2312032020-10-20 08:39:52 -03002283#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002284 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002285 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002286 change_locale = 1;
2287 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002288 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002289 change_locale = 1;
2290 }
2291
2292 /* Keep a copy of the LC_CTYPE locale */
2293 char *oldloc = NULL, *loc = NULL;
2294 if (change_locale) {
2295 oldloc = setlocale(LC_CTYPE, NULL);
2296 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002297 PyErr_SetString(PyExc_RuntimeWarning,
2298 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002299 return -1;
2300 }
2301
2302 oldloc = _PyMem_Strdup(oldloc);
2303 if (!oldloc) {
2304 PyErr_NoMemory();
2305 return -1;
2306 }
2307
2308 loc = setlocale(LC_NUMERIC, NULL);
2309 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2310 loc = NULL;
2311 }
2312
2313 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002314 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002315 if LC_NUMERIC locale is different than LC_CTYPE locale and
2316 decimal_point and/or thousands_sep are non-ASCII or longer than
2317 1 byte */
2318 setlocale(LC_CTYPE, loc);
2319 }
2320 }
2321
TIGirardif2312032020-10-20 08:39:52 -03002322#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2323#else /* MS_WINDOWS */
2324/* Use _W_* fields of Windows strcut lconv */
2325#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2326#endif /* MS_WINDOWS */
2327
Victor Stinner02e6bf72018-11-20 16:20:16 +01002328 int res = -1;
2329
TIGirardif2312032020-10-20 08:39:52 -03002330 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002331 if (*decimal_point == NULL) {
2332 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002333 }
2334
TIGirardif2312032020-10-20 08:39:52 -03002335 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002336 if (*thousands_sep == NULL) {
2337 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002338 }
2339
2340 res = 0;
2341
Victor Stinner02e6bf72018-11-20 16:20:16 +01002342done:
TIGirardif2312032020-10-20 08:39:52 -03002343#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002344 if (loc != NULL) {
2345 setlocale(LC_CTYPE, oldloc);
2346 }
2347 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002348#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002349 return res;
TIGirardif2312032020-10-20 08:39:52 -03002350
2351#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002352}
Kyle Evans79925792020-10-13 15:04:44 -05002353
2354/* Our selection logic for which function to use is as follows:
2355 * 1. If close_range(2) is available, always prefer that; it's better for
2356 * contiguous ranges like this than fdwalk(3) which entails iterating over
2357 * the entire fd space and simply doing nothing for those outside the range.
2358 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2359 * closing up to sysconf(_SC_OPEN_MAX).
2360 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2361 * as that will be more performant if the range happens to have any chunk of
2362 * non-opened fd in the middle.
2363 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2364 */
2365#ifdef __FreeBSD__
2366# define USE_CLOSEFROM
2367#endif /* __FreeBSD__ */
2368
2369#ifdef HAVE_FDWALK
2370# define USE_FDWALK
2371#endif /* HAVE_FDWALK */
2372
2373#ifdef USE_FDWALK
2374static int
2375_fdwalk_close_func(void *lohi, int fd)
2376{
2377 int lo = ((int *)lohi)[0];
2378 int hi = ((int *)lohi)[1];
2379
2380 if (fd >= hi) {
2381 return 1;
2382 }
2383 else if (fd >= lo) {
2384 /* Ignore errors */
2385 (void)close(fd);
2386 }
2387 return 0;
2388}
2389#endif /* USE_FDWALK */
2390
2391/* Closes all file descriptors in [first, last], ignoring errors. */
2392void
2393_Py_closerange(int first, int last)
2394{
2395 first = Py_MAX(first, 0);
2396 _Py_BEGIN_SUPPRESS_IPH
2397#ifdef HAVE_CLOSE_RANGE
2398 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2399 /* Any errors encountered while closing file descriptors are ignored;
2400 * ENOSYS means no kernel support, though,
2401 * so we'll fallback to the other methods. */
2402 }
2403 else
2404#endif /* HAVE_CLOSE_RANGE */
2405#ifdef USE_CLOSEFROM
2406 if (last >= sysconf(_SC_OPEN_MAX)) {
2407 /* Any errors encountered while closing file descriptors are ignored */
2408 closefrom(first);
2409 }
2410 else
2411#endif /* USE_CLOSEFROM */
2412#ifdef USE_FDWALK
2413 {
2414 int lohi[2];
2415 lohi[0] = first;
2416 lohi[1] = last + 1;
2417 fdwalk(_fdwalk_close_func, lohi);
2418 }
2419#else
2420 {
2421 for (int i = first; i <= last; i++) {
2422 /* Ignore errors */
2423 (void)close(i);
2424 }
2425 }
2426#endif /* USE_FDWALK */
2427 _Py_END_SUPPRESS_IPH
2428}