blob: a8fab00629da41c79a99701162b7ce10c6f366e0 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
Jakub Kulík9032cf52021-04-30 15:21:42 +020021#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
22#include <iconv.h>
23#endif
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef HAVE_FCNTL_H
26#include <fcntl.h>
27#endif /* HAVE_FCNTL_H */
28
Victor Stinnerdaf45552013-08-28 00:53:59 +020029#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020030/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020031
32 -1: unknown
33 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
34 1: open() supports O_CLOEXEC flag, close-on-exec is set
35
Victor Stinnera555cfc2015-03-18 00:22:14 +010036 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
37 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020038int _Py_open_cloexec_works = -1;
39#endif
40
Victor Stinner99768342021-03-17 21:46:53 +010041// The value must be the same in unicodeobject.c.
42#define MAX_UNICODE 0x10ffff
43
44// mbstowcs() and mbrtowc() errors
45static const size_t DECODE_ERROR = ((size_t)-1);
46static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
47
Victor Stinner3d4226a2018-08-29 22:21:32 +020048
49static int
50get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
51{
52 switch (errors)
53 {
54 case _Py_ERROR_STRICT:
55 *surrogateescape = 0;
56 return 0;
57 case _Py_ERROR_SURROGATEESCAPE:
58 *surrogateescape = 1;
59 return 0;
60 default:
61 return -1;
62 }
63}
64
65
Brett Cannonefb00c02012-02-29 18:31:31 -050066PyObject *
67_Py_device_encoding(int fd)
68{
Steve Dower8fc89802015-04-12 00:26:27 -040069 int valid;
70 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070071 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040072 _Py_END_SUPPRESS_IPH
73 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050074 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040075
Victor Stinner14b9b112013-06-25 00:37:25 +020076#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010077 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050078 if (fd == 0)
79 cp = GetConsoleCP();
80 else if (fd == 1 || fd == 2)
81 cp = GetConsoleOutputCP();
82 else
83 cp = 0;
84 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
85 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010086 if (cp == 0) {
87 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050088 }
Victor Stinner35297182020-11-04 11:20:10 +010089
90 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
91#else
92 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050093#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050094}
95
Victor Stinner99768342021-03-17 21:46:53 +010096
97static size_t
98is_valid_wide_char(wchar_t ch)
99{
Jakub Kulík9032cf52021-04-30 15:21:42 +0200100#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
101 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
102 for non-Unicode locales, which makes values higher than MAX_UNICODE
103 possibly valid. */
104 return 1;
105#endif
Victor Stinner99768342021-03-17 21:46:53 +0100106 if (Py_UNICODE_IS_SURROGATE(ch)) {
107 // Reject lone surrogate characters
108 return 0;
109 }
110 if (ch > MAX_UNICODE) {
111 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
112 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
113 // it creates characters outside the [U+0000; U+10ffff] range:
114 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
115 return 0;
116 }
117 return 1;
118}
119
120
121static size_t
122_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
123{
124 size_t count = mbstowcs(dest, src, n);
125 if (dest != NULL && count != DECODE_ERROR) {
126 for (size_t i=0; i < count; i++) {
127 wchar_t ch = dest[i];
128 if (!is_valid_wide_char(ch)) {
129 return DECODE_ERROR;
130 }
131 }
132 }
133 return count;
134}
135
136
137#ifdef HAVE_MBRTOWC
138static size_t
139_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
140{
141 assert(pwc != NULL);
142 size_t count = mbrtowc(pwc, str, len, pmbs);
143 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
144 if (!is_valid_wide_char(*pwc)) {
145 return DECODE_ERROR;
146 }
147 }
148 return count;
149}
150#endif
151
152
Victor Stinnere2510952019-05-02 11:28:57 -0400153#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100154
155#define USE_FORCE_ASCII
156
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100157extern int _Py_normalize_encoding(const char *, char *, size_t);
158
Victor Stinnerd500e532018-08-28 17:27:36 +0200159/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
160 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100161 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
162 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
163 locale.getpreferredencoding() codec. For example, if command line arguments
164 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
165 UnicodeEncodeError instead of retrieving the original byte string.
166
167 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
168 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
169 one byte in range 0x80-0xff can be decoded from the locale encoding. The
170 workaround is also enabled on error, for example if getting the locale
171 failed.
172
Victor Stinnerd500e532018-08-28 17:27:36 +0200173 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
174 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
175 ASCII encoding in this case.
176
Philip Jenvey215c49a2013-01-15 13:24:12 -0800177 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100178
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200179 1: the workaround is used: Py_EncodeLocale() uses
180 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100181 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200182 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
183 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100184 -1: unknown, need to call check_force_ascii() to get the value
185*/
186static int force_ascii = -1;
187
188static int
189check_force_ascii(void)
190{
Victor Stinnerd500e532018-08-28 17:27:36 +0200191 char *loc = setlocale(LC_CTYPE, NULL);
192 if (loc == NULL) {
193 goto error;
194 }
195 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
196 /* the LC_CTYPE locale is different than C and POSIX */
197 return 0;
198 }
199
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100200#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200201 const char *codeset = nl_langinfo(CODESET);
202 if (!codeset || codeset[0] == '\0') {
203 /* CODESET is not set or empty */
204 goto error;
205 }
206
Victor Stinner54de2b12016-09-09 23:11:52 -0700207 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200208 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
209 goto error;
210 }
211
212#ifdef __hpux
213 if (strcmp(encoding, "roman8") == 0) {
214 unsigned char ch;
215 wchar_t wch;
216 size_t res;
217
218 ch = (unsigned char)0xA7;
Victor Stinner99768342021-03-17 21:46:53 +0100219 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
220 if (res != DECODE_ERROR && wch == L'\xA7') {
Victor Stinnerd500e532018-08-28 17:27:36 +0200221 /* On HP-UX withe C locale or the POSIX locale,
222 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
223 Latin1 encoding in practice. Force ASCII in this case.
224
225 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
226 return 1;
227 }
228 }
229#else
230 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100231 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700232 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100233 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700234 "ansi_x3.4_1968",
235 "ansi_x3.4_1986",
236 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100237 "cp367",
238 "csascii",
239 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700240 "iso646_us",
241 "iso_646.irv_1991",
242 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100243 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700244 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100245 NULL
246 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100247
Victor Stinnerd500e532018-08-28 17:27:36 +0200248 int is_ascii = 0;
249 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100250 if (strcmp(encoding, *alias) == 0) {
251 is_ascii = 1;
252 break;
253 }
254 }
255 if (!is_ascii) {
256 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
257 return 0;
258 }
259
Victor Stinnerd500e532018-08-28 17:27:36 +0200260 for (unsigned int i=0x80; i<=0xff; i++) {
261 char ch[1];
262 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100263 size_t res;
264
Victor Stinnerd500e532018-08-28 17:27:36 +0200265 unsigned uch = (unsigned char)i;
266 ch[0] = (char)uch;
Victor Stinner99768342021-03-17 21:46:53 +0100267 res = _Py_mbstowcs(wch, ch, 1);
268 if (res != DECODE_ERROR) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100269 /* decoding a non-ASCII character from the locale encoding succeed:
270 the locale encoding is not ASCII, force ASCII */
271 return 1;
272 }
273 }
274 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
275 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200276#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100277 return 0;
278#else
279 /* nl_langinfo(CODESET) is not available: always force ASCII */
280 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200281#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100282
283error:
Martin Panter46f50722016-05-26 05:35:26 +0000284 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100285 return 1;
286}
287
Victor Stinnerd500e532018-08-28 17:27:36 +0200288
289int
290_Py_GetForceASCII(void)
291{
292 if (force_ascii == -1) {
293 force_ascii = check_force_ascii();
294 }
295 return force_ascii;
296}
297
298
Victor Stinner353933e2018-11-23 13:08:26 +0100299void
300_Py_ResetForceASCII(void)
301{
302 force_ascii = -1;
303}
304
305
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100306static int
307encode_ascii(const wchar_t *text, char **str,
308 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200309 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100310{
311 char *result = NULL, *out;
312 size_t len, i;
313 wchar_t ch;
314
Victor Stinner3d4226a2018-08-29 22:21:32 +0200315 int surrogateescape;
316 if (get_surrogateescape(errors, &surrogateescape) < 0) {
317 return -3;
318 }
319
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100320 len = wcslen(text);
321
Victor Stinner9bee3292017-12-21 16:49:13 +0100322 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100323 if (raw_malloc) {
324 result = PyMem_RawMalloc(len + 1);
325 }
326 else {
327 result = PyMem_Malloc(len + 1);
328 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100329 if (result == NULL) {
330 return -1;
331 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100332
333 out = result;
334 for (i=0; i<len; i++) {
335 ch = text[i];
336
337 if (ch <= 0x7f) {
338 /* ASCII character */
339 *out++ = (char)ch;
340 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100341 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100342 /* UTF-8b surrogate */
343 *out++ = (char)(ch - 0xdc00);
344 }
345 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100346 if (raw_malloc) {
347 PyMem_RawFree(result);
348 }
349 else {
350 PyMem_Free(result);
351 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100352 if (error_pos != NULL) {
353 *error_pos = i;
354 }
355 if (reason) {
356 *reason = "encoding error";
357 }
358 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100359 }
360 }
361 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100362 *str = result;
363 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100364}
Victor Stinnerd500e532018-08-28 17:27:36 +0200365#else
366int
367_Py_GetForceASCII(void)
368{
369 return 0;
370}
Victor Stinner353933e2018-11-23 13:08:26 +0100371
372void
373_Py_ResetForceASCII(void)
374{
375 /* nothing to do */
376}
Victor Stinnere2510952019-05-02 11:28:57 -0400377#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100378
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100379
380#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
381static int
382decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200383 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100384{
385 wchar_t *res;
386 unsigned char *in;
387 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600388 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100389
Victor Stinner3d4226a2018-08-29 22:21:32 +0200390 int surrogateescape;
391 if (get_surrogateescape(errors, &surrogateescape) < 0) {
392 return -3;
393 }
394
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100395 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
396 return -1;
397 }
398 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
399 if (!res) {
400 return -1;
401 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100402
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100403 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100404 for (in = (unsigned char*)arg; *in; in++) {
405 unsigned char ch = *in;
406 if (ch < 128) {
407 *out++ = ch;
408 }
409 else {
410 if (!surrogateescape) {
411 PyMem_RawFree(res);
412 if (wlen) {
413 *wlen = in - (unsigned char*)arg;
414 }
415 if (reason) {
416 *reason = "decoding error";
417 }
418 return -2;
419 }
420 *out++ = 0xdc00 + ch;
421 }
422 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100423 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100424
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100425 if (wlen != NULL) {
426 *wlen = out - res;
427 }
428 *wstr = res;
429 return 0;
430}
431#endif /* !HAVE_MBRTOWC */
432
433static int
434decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200435 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000436{
437 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100438 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000439 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200440#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000441 unsigned char *in;
442 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000443 mbstate_t mbs;
444#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100445
Victor Stinner3d4226a2018-08-29 22:21:32 +0200446 int surrogateescape;
447 if (get_surrogateescape(errors, &surrogateescape) < 0) {
448 return -3;
449 }
450
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100451#ifdef HAVE_BROKEN_MBSTOWCS
452 /* Some platforms have a broken implementation of
453 * mbstowcs which does not count the characters that
454 * would result from conversion. Use an upper bound.
455 */
456 argsize = strlen(arg);
457#else
Victor Stinner99768342021-03-17 21:46:53 +0100458 argsize = _Py_mbstowcs(NULL, arg, 0);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100459#endif
Victor Stinner99768342021-03-17 21:46:53 +0100460 if (argsize != DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100461 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
462 return -1;
463 }
464 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
465 if (!res) {
466 return -1;
467 }
468
Victor Stinner99768342021-03-17 21:46:53 +0100469 count = _Py_mbstowcs(res, arg, argsize + 1);
470 if (count != DECODE_ERROR) {
471 *wstr = res;
472 if (wlen != NULL) {
473 *wlen = count;
Victor Stinner168e1172010-10-16 23:16:16 +0000474 }
Victor Stinner99768342021-03-17 21:46:53 +0100475 return 0;
Victor Stinner4e314432010-10-07 21:45:39 +0000476 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200477 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000478 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100479
Victor Stinner4e314432010-10-07 21:45:39 +0000480 /* Conversion failed. Fall back to escaping with surrogateescape. */
481#ifdef HAVE_MBRTOWC
482 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
483
484 /* Overallocate; as multi-byte characters are in the argument, the
485 actual output could use less memory. */
486 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100487 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
488 return -1;
489 }
490 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
491 if (!res) {
492 return -1;
493 }
494
Victor Stinner4e314432010-10-07 21:45:39 +0000495 in = (unsigned char*)arg;
496 out = res;
497 memset(&mbs, 0, sizeof mbs);
498 while (argsize) {
Victor Stinner99768342021-03-17 21:46:53 +0100499 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100500 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000501 /* Reached end of string; null char stored. */
502 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100503 }
504
Victor Stinner99768342021-03-17 21:46:53 +0100505 if (converted == INCOMPLETE_CHARACTER) {
Victor Stinner4e314432010-10-07 21:45:39 +0000506 /* Incomplete character. This should never happen,
507 since we provide everything that we have -
508 unless there is a bug in the C library, or I
509 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100510 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000511 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100512
Victor Stinner99768342021-03-17 21:46:53 +0100513 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100514 if (!surrogateescape) {
515 goto decode_error;
516 }
517
Victor Stinner99768342021-03-17 21:46:53 +0100518 /* Decoding error. Escape as UTF-8b, and start over in the initial
519 shift state. */
Victor Stinner4e314432010-10-07 21:45:39 +0000520 *out++ = 0xdc00 + *in++;
521 argsize--;
522 memset(&mbs, 0, sizeof mbs);
523 continue;
524 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100525
Victor Stinner99768342021-03-17 21:46:53 +0100526 // _Py_mbrtowc() reject lone surrogate characters
527 assert(!Py_UNICODE_IS_SURROGATE(*out));
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100528
Victor Stinner4e314432010-10-07 21:45:39 +0000529 /* successfully converted some bytes */
530 in += converted;
531 argsize -= converted;
532 out++;
533 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100534 if (wlen != NULL) {
535 *wlen = out - res;
536 }
537 *wstr = res;
538 return 0;
539
540decode_error:
541 PyMem_RawFree(res);
542 if (wlen) {
543 *wlen = in - (unsigned char*)arg;
544 }
545 if (reason) {
546 *reason = "decoding error";
547 }
548 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100549#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000550 /* Cannot use C locale for escaping; manually escape as if charset
551 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
552 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200553 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100554#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100555}
556
557
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100558/* Decode a byte string from the locale encoding.
559
560 Use the strict error handler if 'surrogateescape' is zero. Use the
561 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
562 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
563 can be decoded as a surrogate character, escape the bytes using the
564 surrogateescape error handler instead of decoding them.
565
Ville Skyttä61f82e02018-04-20 23:08:45 +0300566 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100567 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
568 the number of wide characters excluding the null character into *wlen.
569
570 On memory allocation failure, return -1.
571
572 On decoding error, return -2. If wlen is not NULL, write the start of
573 invalid byte sequence in the input string into *wlen. If reason is not NULL,
574 write the decoding error message into *reason.
575
Victor Stinner3d4226a2018-08-29 22:21:32 +0200576 Return -3 if the error handler 'errors' is not supported.
577
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100578 Use the Py_EncodeLocaleEx() function to encode the character string back to
579 a byte string. */
580int
581_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
582 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200583 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100584{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100585 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400586#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100587 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200588 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100589#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200590 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100591#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100592 }
593
Victor Stinnere2510952019-05-02 11:28:57 -0400594#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100595 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200596 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100597#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200598 int use_utf8 = (Py_UTF8Mode == 1);
599#ifdef MS_WINDOWS
600 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
601#endif
602 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200603 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100605 }
606
607#ifdef USE_FORCE_ASCII
608 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100609 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100610 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100611
612 if (force_ascii) {
613 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200614 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100615 }
616#endif
617
Victor Stinner3d4226a2018-08-29 22:21:32 +0200618 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400619#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100620}
621
622
Victor Stinner91106cd2017-12-13 12:29:09 +0100623/* Decode a byte string from the locale encoding with the
624 surrogateescape error handler: undecodable bytes are decoded as characters
625 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
626 character, escape the bytes using the surrogateescape error handler instead
627 of decoding them.
628
629 Return a pointer to a newly allocated wide character string, use
630 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
631 wide characters excluding the null character into *size
632
633 Return NULL on decoding error or memory allocation error. If *size* is not
634 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
635 decoding error.
636
637 Decoding errors should never happen, unless there is a bug in the C
638 library.
639
640 Use the Py_EncodeLocale() function to encode the character string back to a
641 byte string. */
642wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100643Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100644{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100645 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200646 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
647 NULL, 0,
648 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100649 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200650 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 if (wlen != NULL) {
652 *wlen = (size_t)res;
653 }
654 return NULL;
655 }
656 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100657}
Victor Stinner91106cd2017-12-13 12:29:09 +0100658
Victor Stinner91106cd2017-12-13 12:29:09 +0100659
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100660static int
661encode_current_locale(const wchar_t *text, char **str,
662 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200663 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100664{
Victor Stinner4e314432010-10-07 21:45:39 +0000665 const size_t len = wcslen(text);
666 char *result = NULL, *bytes = NULL;
667 size_t i, size, converted;
668 wchar_t c, buf[2];
669
Victor Stinner3d4226a2018-08-29 22:21:32 +0200670 int surrogateescape;
671 if (get_surrogateescape(errors, &surrogateescape) < 0) {
672 return -3;
673 }
674
Victor Stinner4e314432010-10-07 21:45:39 +0000675 /* The function works in two steps:
676 1. compute the length of the output buffer in bytes (size)
677 2. outputs the bytes */
678 size = 0;
679 buf[1] = 0;
680 while (1) {
681 for (i=0; i < len; i++) {
682 c = text[i];
683 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 if (!surrogateescape) {
685 goto encode_error;
686 }
Victor Stinner4e314432010-10-07 21:45:39 +0000687 /* UTF-8b surrogate */
688 if (bytes != NULL) {
689 *bytes++ = c - 0xdc00;
690 size--;
691 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100692 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000693 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100694 }
Victor Stinner4e314432010-10-07 21:45:39 +0000695 continue;
696 }
697 else {
698 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100699 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000700 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100701 }
702 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000703 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100704 }
Victor Stinner99768342021-03-17 21:46:53 +0100705 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100706 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000707 }
708 if (bytes != NULL) {
709 bytes += converted;
710 size -= converted;
711 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100712 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000713 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100714 }
Victor Stinner4e314432010-10-07 21:45:39 +0000715 }
716 }
717 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100718 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000719 break;
720 }
721
722 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100723 if (raw_malloc) {
724 result = PyMem_RawMalloc(size);
725 }
726 else {
727 result = PyMem_Malloc(size);
728 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100729 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100730 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100731 }
Victor Stinner4e314432010-10-07 21:45:39 +0000732 bytes = result;
733 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100734 *str = result;
735 return 0;
736
737encode_error:
738 if (raw_malloc) {
739 PyMem_RawFree(result);
740 }
741 else {
742 PyMem_Free(result);
743 }
744 if (error_pos != NULL) {
745 *error_pos = i;
746 }
747 if (reason) {
748 *reason = "encoding error";
749 }
750 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100751}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100752
Victor Stinner3d4226a2018-08-29 22:21:32 +0200753
754/* Encode a string to the locale encoding.
755
756 Parameters:
757
758 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
759 of PyMem_Malloc().
760 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
761 Python filesystem encoding.
762 * errors: error handler like "strict" or "surrogateescape".
763
764 Return value:
765
766 0: success, *str is set to a newly allocated decoded string.
767 -1: memory allocation failure
768 -2: encoding error, set *error_pos and *reason (if set).
769 -3: the error handler 'errors' is not supported.
770 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100771static int
772encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
773 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200774 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100775{
776 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400777#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100778 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200779 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100780#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100781 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200782 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100783#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100784 }
785
Victor Stinnere2510952019-05-02 11:28:57 -0400786#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100787 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200788 raw_malloc, errors);
789#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200790 int use_utf8 = (Py_UTF8Mode == 1);
791#ifdef MS_WINDOWS
792 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
793#endif
794 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100795 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200796 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100797 }
798
799#ifdef USE_FORCE_ASCII
800 if (force_ascii == -1) {
801 force_ascii = check_force_ascii();
802 }
803
804 if (force_ascii) {
805 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200806 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100807 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100808#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100809
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100810 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200811 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400812#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100813}
814
Victor Stinner9dd76202017-12-21 16:20:32 +0100815static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100816encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100817 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100818{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100819 char *str;
820 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200821 raw_malloc, current_locale,
822 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100823 if (res != -2 && error_pos) {
824 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100825 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100826 if (res != 0) {
827 return NULL;
828 }
829 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100830}
831
Victor Stinner91106cd2017-12-13 12:29:09 +0100832/* Encode a wide character string to the locale encoding with the
833 surrogateescape error handler: surrogate characters in the range
834 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
835
836 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
837 the memory. Return NULL on encoding or memory allocation error.
838
839 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
840 to the index of the invalid character on encoding error.
841
842 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
843 character string. */
844char*
845Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
846{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100847 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100848}
Victor Stinner91106cd2017-12-13 12:29:09 +0100849
Victor Stinner91106cd2017-12-13 12:29:09 +0100850
Victor Stinner9dd76202017-12-21 16:20:32 +0100851/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
852 instead of PyMem_Free(). */
853char*
854_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
855{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100856 return encode_locale(text, error_pos, 1, 0);
857}
858
859
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100860int
861_Py_EncodeLocaleEx(const wchar_t *text, char **str,
862 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200863 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100864{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100865 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200866 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000867}
868
Victor Stinner6672d0c2010-10-07 22:53:43 +0000869
Victor Stinner82458b62020-11-01 20:59:35 +0100870// Get the current locale encoding name:
871//
872// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
873// - Return "UTF-8" if the UTF-8 Mode is enabled
874// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100875// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100876// - Otherwise, return nl_langinfo(CODESET).
877//
Victor Stinnere662c392020-11-01 23:07:23 +0100878// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100879//
Victor Stinner710e8262020-10-31 01:02:09 +0100880// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100881wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100882_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100883{
884#ifdef _Py_FORCE_UTF8_LOCALE
885 // On Android langinfo.h and CODESET are missing,
886 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100887 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100888#else
889 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
890 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100891 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100892 }
893
Victor Stinner82458b62020-11-01 20:59:35 +0100894#ifdef MS_WINDOWS
895 wchar_t encoding[23];
896 unsigned int ansi_codepage = GetACP();
897 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
898 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
899 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100900#else
901 const char *encoding = nl_langinfo(CODESET);
902 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100903 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
904 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100905 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100906 }
Victor Stinner710e8262020-10-31 01:02:09 +0100907
Victor Stinner82458b62020-11-01 20:59:35 +0100908 wchar_t *wstr;
909 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100910 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100911 if (res < 0) {
912 return NULL;
913 }
914 return wstr;
915#endif // !MS_WINDOWS
916
917#endif // !_Py_FORCE_UTF8_LOCALE
918}
919
920
921PyObject *
922_Py_GetLocaleEncodingObject(void)
923{
Victor Stinnere662c392020-11-01 23:07:23 +0100924 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100925 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100926 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100927 return NULL;
928 }
929
930 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
931 PyMem_RawFree(encoding);
932 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100933}
934
Jakub Kulík9032cf52021-04-30 15:21:42 +0200935#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
936
937/* Check whether current locale uses Unicode as internal wchar_t form. */
938int
939_Py_LocaleUsesNonUnicodeWchar(void)
940{
941 /* Oracle Solaris uses non-Unicode internal wchar_t form for
942 non-Unicode locales and hence needs conversion to UTF first. */
943 char* codeset = nl_langinfo(CODESET);
944 if (!codeset) {
945 return 0;
946 }
947 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
948 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
949}
950
951static wchar_t *
952_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
953 const char *tocode, const char *fromcode)
954{
955 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
956
957 /* Ensure we won't overflow the size. */
958 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
959 PyErr_NoMemory();
960 return NULL;
961 }
962
963 /* the string doesn't have to be NULL terminated */
964 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
965 if (target == NULL) {
966 PyErr_NoMemory();
967 return NULL;
968 }
969
970 iconv_t cd = iconv_open(tocode, fromcode);
971 if (cd == (iconv_t)-1) {
972 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
973 PyMem_Free(target);
974 return NULL;
975 }
976
977 char *inbuf = (char *) source;
978 char *outbuf = (char *) target;
979 size_t inbytesleft = sizeof(wchar_t) * size;
980 size_t outbytesleft = inbytesleft;
981
982 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
983 if (ret == DECODE_ERROR) {
984 PyErr_Format(PyExc_ValueError, "iconv() failed");
985 PyMem_Free(target);
986 iconv_close(cd);
987 return NULL;
988 }
989
990 iconv_close(cd);
991 return target;
992}
993
994/* Convert a wide character string to the UCS-4 encoded string. This
995 is necessary on systems where internal form of wchar_t are not Unicode
996 code points (e.g. Oracle Solaris).
997
998 Return a pointer to a newly allocated string, use PyMem_Free() to free
999 the memory. Return NULL and raise exception on conversion or memory
1000 allocation error. */
1001wchar_t *
1002_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1003{
1004 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1005}
1006
1007/* Convert a UCS-4 encoded string to native wide character string. This
1008 is necessary on systems where internal form of wchar_t are not Unicode
1009 code points (e.g. Oracle Solaris).
1010
1011 The conversion is done in place. This can be done because both wchar_t
1012 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1013 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1014 which is currently the only system using these functions; it doesn't have
1015 to be for other systems).
1016
1017 Return 0 on success. Return -1 and raise exception on conversion
1018 or memory allocation error. */
1019int
1020_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1021{
1022 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1023 if (!result) {
1024 return -1;
1025 }
1026 memcpy(unicode, result, size * sizeof(wchar_t));
1027 PyMem_Free(result);
1028 return 0;
1029}
1030#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
Victor Stinner710e8262020-10-31 01:02:09 +01001031
Steve Dowerf2f373f2015-02-21 08:44:05 -08001032#ifdef MS_WINDOWS
1033static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1034
1035static void
1036FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1037{
1038 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1039 /* Cannot simply cast and dereference in_ptr,
1040 since it might not be aligned properly */
1041 __int64 in;
1042 memcpy(&in, in_ptr, sizeof(in));
1043 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1044 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1045}
1046
1047void
Steve Dowerbf1f3762015-02-21 15:26:02 -08001048_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001049{
1050 /* XXX endianness */
1051 __int64 out;
1052 out = time_in + secs_between_epochs;
1053 out = out * 10000000 + nsec_in / 100;
1054 memcpy(out_ptr, &out, sizeof(out));
1055}
1056
1057/* Below, we *know* that ugo+r is 0444 */
1058#if _S_IREAD != 0400
1059#error Unsupported C library
1060#endif
1061static int
1062attributes_to_mode(DWORD attr)
1063{
1064 int m = 0;
1065 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1066 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1067 else
1068 m |= _S_IFREG;
1069 if (attr & FILE_ATTRIBUTE_READONLY)
1070 m |= 0444;
1071 else
1072 m |= 0666;
1073 return m;
1074}
1075
Steve Dowerbf1f3762015-02-21 15:26:02 -08001076void
Victor Stinnere134a7f2015-03-30 10:09:31 +02001077_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1078 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001079{
1080 memset(result, 0, sizeof(*result));
1081 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1082 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1083 result->st_dev = info->dwVolumeSerialNumber;
1084 result->st_rdev = result->st_dev;
1085 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1086 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1087 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1088 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +01001089 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -07001090 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1091 open other name surrogate reparse points without traversing them. To
1092 detect/handle these, check st_file_attributes and st_reparse_tag. */
1093 result->st_reparse_tag = reparse_tag;
1094 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1095 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -08001096 /* first clear the S_IFMT bits */
1097 result->st_mode ^= (result->st_mode & S_IFMT);
1098 /* now set the bits that make this a symlink */
1099 result->st_mode |= S_IFLNK;
1100 }
1101 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001102}
1103#endif
1104
1105/* Return information about a file.
1106
1107 On POSIX, use fstat().
1108
1109 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001110 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1111 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -08001112 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +02001113
1114 On Windows, set the last Windows error and return nonzero on error. On
1115 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1116 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -08001117int
Victor Stinnere134a7f2015-03-30 10:09:31 +02001118_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001119{
1120#ifdef MS_WINDOWS
1121 BY_HANDLE_FILE_INFORMATION info;
1122 HANDLE h;
1123 int type;
1124
Segev Finer5e437fb2021-04-24 01:00:27 +03001125 h = _Py_get_osfhandle_noraise(fd);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001126
1127 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -04001128 /* errno is already set by _get_osfhandle, but we also set
1129 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -08001130 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001131 return -1;
1132 }
Victor Stinnere134a7f2015-03-30 10:09:31 +02001133 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -08001134
1135 type = GetFileType(h);
1136 if (type == FILE_TYPE_UNKNOWN) {
1137 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -04001138 if (error != 0) {
1139 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001140 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -04001141 }
Steve Dowerf2f373f2015-02-21 08:44:05 -08001142 /* else: valid but unknown file */
1143 }
1144
1145 if (type != FILE_TYPE_DISK) {
1146 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001147 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001148 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001149 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001150 return 0;
1151 }
1152
1153 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001154 /* The Win32 error is already set, but we also set errno for
1155 callers who expect it */
1156 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001157 return -1;
1158 }
1159
Victor Stinnere134a7f2015-03-30 10:09:31 +02001160 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001161 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001162 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001163 return 0;
1164#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001165 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001166#endif
1167}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001168
Victor Stinnere134a7f2015-03-30 10:09:31 +02001169/* Return information about a file.
1170
1171 On POSIX, use fstat().
1172
1173 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001174 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1175 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001176 #23152.
1177
1178 Raise an exception and return -1 on error. On Windows, set the last Windows
1179 error on error. On POSIX, set errno on error. Fill status and return 0 on
1180 success.
1181
Victor Stinner6f4fae82015-04-01 18:34:32 +02001182 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1183 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001184int
1185_Py_fstat(int fd, struct _Py_stat_struct *status)
1186{
1187 int res;
1188
Victor Stinner8a1be612016-03-14 22:07:55 +01001189 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001190
Victor Stinnere134a7f2015-03-30 10:09:31 +02001191 Py_BEGIN_ALLOW_THREADS
1192 res = _Py_fstat_noraise(fd, status);
1193 Py_END_ALLOW_THREADS
1194
1195 if (res != 0) {
1196#ifdef MS_WINDOWS
1197 PyErr_SetFromWindowsErr(0);
1198#else
1199 PyErr_SetFromErrno(PyExc_OSError);
1200#endif
1201 return -1;
1202 }
1203 return 0;
1204}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001205
Victor Stinner6672d0c2010-10-07 22:53:43 +00001206/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1207 call stat() otherwise. Only fill st_mode attribute on Windows.
1208
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001209 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1210 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001211
1212int
Victor Stinnera4a75952010-10-07 22:23:10 +00001213_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001214{
1215#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001216 int err;
1217 struct _stat wstatbuf;
1218
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001219#if USE_UNICODE_WCHAR_CACHE
1220 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1221#else /* USE_UNICODE_WCHAR_CACHE */
1222 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1223#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001224 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001225 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001226
Victor Stinneree587ea2011-11-17 00:51:38 +01001227 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001228 if (!err)
1229 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001230#if !USE_UNICODE_WCHAR_CACHE
1231 PyMem_Free(wpath);
1232#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001233 return err;
1234#else
1235 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001236 PyObject *bytes;
1237 char *cpath;
1238
1239 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001240 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001241 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001242
1243 /* check for embedded null bytes */
1244 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1245 Py_DECREF(bytes);
1246 return -2;
1247 }
1248
1249 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001250 Py_DECREF(bytes);
1251 return ret;
1252#endif
1253}
1254
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001255
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001256/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001257static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001258get_inheritable(int fd, int raise)
1259{
1260#ifdef MS_WINDOWS
1261 HANDLE handle;
1262 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001263
Segev Finer5e437fb2021-04-24 01:00:27 +03001264 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001265 if (handle == INVALID_HANDLE_VALUE) {
1266 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001267 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001268 return -1;
1269 }
1270
1271 if (!GetHandleInformation(handle, &flags)) {
1272 if (raise)
1273 PyErr_SetFromWindowsErr(0);
1274 return -1;
1275 }
1276
1277 return (flags & HANDLE_FLAG_INHERIT);
1278#else
1279 int flags;
1280
1281 flags = fcntl(fd, F_GETFD, 0);
1282 if (flags == -1) {
1283 if (raise)
1284 PyErr_SetFromErrno(PyExc_OSError);
1285 return -1;
1286 }
1287 return !(flags & FD_CLOEXEC);
1288#endif
1289}
1290
1291/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001292 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001293 raise an exception and return -1 on error. */
1294int
1295_Py_get_inheritable(int fd)
1296{
1297 return get_inheritable(fd, 1);
1298}
1299
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001300
1301/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001302static int
1303set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1304{
1305#ifdef MS_WINDOWS
1306 HANDLE handle;
1307 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001308#else
1309#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1310 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001311 int request;
1312 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001313#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001314 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001315 int res;
1316#endif
1317
1318 /* atomic_flag_works can only be used to make the file descriptor
1319 non-inheritable */
1320 assert(!(atomic_flag_works != NULL && inheritable));
1321
1322 if (atomic_flag_works != NULL && !inheritable) {
1323 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001324 int isInheritable = get_inheritable(fd, raise);
1325 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001326 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001327 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001328 }
1329
1330 if (*atomic_flag_works)
1331 return 0;
1332 }
1333
1334#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03001335 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001336 if (handle == INVALID_HANDLE_VALUE) {
1337 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001338 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001339 return -1;
1340 }
1341
1342 if (inheritable)
1343 flags = HANDLE_FLAG_INHERIT;
1344 else
1345 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001346
1347 /* This check can be removed once support for Windows 7 ends. */
1348#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1349 GetFileType(handle) == FILE_TYPE_CHAR)
1350
1351 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1352 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001353 if (raise)
1354 PyErr_SetFromWindowsErr(0);
1355 return -1;
1356 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001357#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001358 return 0;
1359
Victor Stinnerdaf45552013-08-28 00:53:59 +02001360#else
Victor Stinner282124b2014-09-02 11:41:04 +02001361
1362#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001363 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001364 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001365 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1366 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001367 if (inheritable)
1368 request = FIONCLEX;
1369 else
1370 request = FIOCLEX;
1371 err = ioctl(fd, request, NULL);
1372 if (!err) {
1373 ioctl_works = 1;
1374 return 0;
1375 }
1376
cptpcrd7dc71c42021-01-20 09:05:51 -05001377#ifdef __linux__
1378 if (errno == EBADF) {
1379 // On Linux, ioctl(FIOCLEX) will fail with EBADF for O_PATH file descriptors
1380 // Fall through to the fcntl() path
1381 }
1382 else
1383#endif
Victor Stinner3116cc42016-05-19 16:46:18 +02001384 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001385 if (raise)
1386 PyErr_SetFromErrno(PyExc_OSError);
1387 return -1;
1388 }
1389 else {
1390 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1391 device". The ioctl is declared but not supported by the kernel.
1392 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001393 Illumos-based OS for example.
1394
1395 Issue #27057: When SELinux policy disallows ioctl it will fail
1396 with EACCES. While FIOCLEX is safe operation it may be
1397 unavailable because ioctl was denied altogether.
1398 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001399 ioctl_works = 0;
1400 }
1401 /* fallback to fcntl() if ioctl() does not work */
1402 }
1403#endif
1404
1405 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001406 flags = fcntl(fd, F_GETFD);
1407 if (flags < 0) {
1408 if (raise)
1409 PyErr_SetFromErrno(PyExc_OSError);
1410 return -1;
1411 }
1412
Victor Stinnera858bbd2016-04-17 16:51:52 +02001413 if (inheritable) {
1414 new_flags = flags & ~FD_CLOEXEC;
1415 }
1416 else {
1417 new_flags = flags | FD_CLOEXEC;
1418 }
1419
1420 if (new_flags == flags) {
1421 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1422 return 0;
1423 }
1424
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001425 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001426 if (res < 0) {
1427 if (raise)
1428 PyErr_SetFromErrno(PyExc_OSError);
1429 return -1;
1430 }
1431 return 0;
1432#endif
1433}
1434
1435/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001436 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001437static int
1438make_non_inheritable(int fd)
1439{
1440 return set_inheritable(fd, 0, 0, NULL);
1441}
1442
1443/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001444 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001445
1446 If atomic_flag_works is not NULL:
1447
1448 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1449 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1450 set the inheritable flag
1451 * if *atomic_flag_works==1: do nothing
1452 * if *atomic_flag_works==0: set inheritable flag to False
1453
1454 Set atomic_flag_works to NULL if no atomic flag was used to create the
1455 file descriptor.
1456
1457 atomic_flag_works can only be used to make a file descriptor
1458 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1459int
1460_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1461{
1462 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1463}
1464
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001465/* Same as _Py_set_inheritable() but on error, set errno and
1466 don't raise an exception.
1467 This function is async-signal-safe. */
1468int
1469_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1470{
1471 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1472}
1473
Victor Stinnera555cfc2015-03-18 00:22:14 +01001474static int
1475_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001476{
1477 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001478 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001479#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001480 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001481#endif
1482
1483#ifdef MS_WINDOWS
1484 flags |= O_NOINHERIT;
1485#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001486 atomic_flag_works = &_Py_open_cloexec_works;
1487 flags |= O_CLOEXEC;
1488#else
1489 atomic_flag_works = NULL;
1490#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001491
Victor Stinnera555cfc2015-03-18 00:22:14 +01001492 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001493 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1494 if (pathname_obj == NULL) {
1495 return -1;
1496 }
1497 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1498 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001499 return -1;
1500 }
1501
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001502 do {
1503 Py_BEGIN_ALLOW_THREADS
1504 fd = open(pathname, flags);
1505 Py_END_ALLOW_THREADS
1506 } while (fd < 0
1507 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001508 if (async_err) {
1509 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001510 return -1;
1511 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001512 if (fd < 0) {
1513 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1514 Py_DECREF(pathname_obj);
1515 return -1;
1516 }
1517 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001518 }
1519 else {
1520 fd = open(pathname, flags);
1521 if (fd < 0)
1522 return -1;
1523 }
1524
1525#ifndef MS_WINDOWS
1526 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001527 close(fd);
1528 return -1;
1529 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001530#endif
1531
Victor Stinnerdaf45552013-08-28 00:53:59 +02001532 return fd;
1533}
1534
Victor Stinnera555cfc2015-03-18 00:22:14 +01001535/* Open a file with the specified flags (wrapper to open() function).
1536 Return a file descriptor on success. Raise an exception and return -1 on
1537 error.
1538
1539 The file descriptor is created non-inheritable.
1540
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001541 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1542 except if the Python signal handler raises an exception.
1543
Victor Stinner6f4fae82015-04-01 18:34:32 +02001544 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001545int
1546_Py_open(const char *pathname, int flags)
1547{
1548 /* _Py_open() must be called with the GIL held. */
1549 assert(PyGILState_Check());
1550 return _Py_open_impl(pathname, flags, 1);
1551}
1552
1553/* Open a file with the specified flags (wrapper to open() function).
1554 Return a file descriptor on success. Set errno and return -1 on error.
1555
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001556 The file descriptor is created non-inheritable.
1557
1558 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001559int
1560_Py_open_noraise(const char *pathname, int flags)
1561{
1562 return _Py_open_impl(pathname, flags, 0);
1563}
1564
Victor Stinnerdaf45552013-08-28 00:53:59 +02001565/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001566 encoding and use fopen() otherwise.
1567
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001568 The file descriptor is created non-inheritable.
1569
1570 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001571FILE *
1572_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1573{
Victor Stinner4e314432010-10-07 21:45:39 +00001574 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001575 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1576 return NULL;
1577 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001578#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001579 char *cpath;
1580 char cmode[10];
1581 size_t r;
1582 r = wcstombs(cmode, mode, 10);
Victor Stinner99768342021-03-17 21:46:53 +01001583 if (r == DECODE_ERROR || r >= 10) {
Victor Stinner4e314432010-10-07 21:45:39 +00001584 errno = EINVAL;
1585 return NULL;
1586 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001587 cpath = _Py_EncodeLocaleRaw(path, NULL);
1588 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001589 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001590 }
Victor Stinner4e314432010-10-07 21:45:39 +00001591 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001592 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001593#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001594 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001595#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001596 if (f == NULL)
1597 return NULL;
1598 if (make_non_inheritable(fileno(f)) < 0) {
1599 fclose(f);
1600 return NULL;
1601 }
1602 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001603}
1604
Victor Stinnerdaf45552013-08-28 00:53:59 +02001605
1606/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001607 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001608
Victor Stinnere42ccd22015-03-18 01:39:23 +01001609 Return the new file object on success. Raise an exception and return NULL
1610 on error.
1611
1612 The file descriptor is created non-inheritable.
1613
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001614 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1615 except if the Python signal handler raises an exception.
1616
Victor Stinner6f4fae82015-04-01 18:34:32 +02001617 Release the GIL to call _wfopen() or fopen(). The caller must hold
1618 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001619FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001620_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001621{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001622 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001623 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001624#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001625 wchar_t wmode[10];
1626 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001627
Victor Stinnere42ccd22015-03-18 01:39:23 +01001628 assert(PyGILState_Check());
1629
Steve Dowerb82e17e2019-05-23 08:45:22 -07001630 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1631 return NULL;
1632 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001633 if (!PyUnicode_Check(path)) {
1634 PyErr_Format(PyExc_TypeError,
1635 "str file path expected under Windows, got %R",
1636 Py_TYPE(path));
1637 return NULL;
1638 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001639#if USE_UNICODE_WCHAR_CACHE
1640 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1641#else /* USE_UNICODE_WCHAR_CACHE */
1642 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1643#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001644 if (wpath == NULL)
1645 return NULL;
1646
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001647 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1648 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001649 if (usize == 0) {
1650 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001651#if !USE_UNICODE_WCHAR_CACHE
1652 PyMem_Free(wpath);
1653#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001654 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001655 }
Victor Stinner4e314432010-10-07 21:45:39 +00001656
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001657 do {
1658 Py_BEGIN_ALLOW_THREADS
1659 f = _wfopen(wpath, wmode);
1660 Py_END_ALLOW_THREADS
1661 } while (f == NULL
1662 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001663#if !USE_UNICODE_WCHAR_CACHE
1664 PyMem_Free(wpath);
1665#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001666#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001667 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001668 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001669
1670 assert(PyGILState_Check());
1671
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001672 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001673 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001674 path_bytes = PyBytes_AS_STRING(bytes);
1675
Steve Dowerb82e17e2019-05-23 08:45:22 -07001676 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001677 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001678 return NULL;
1679 }
1680
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001681 do {
1682 Py_BEGIN_ALLOW_THREADS
1683 f = fopen(path_bytes, mode);
1684 Py_END_ALLOW_THREADS
1685 } while (f == NULL
1686 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001687
Victor Stinner4e314432010-10-07 21:45:39 +00001688 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001689#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001690 if (async_err)
1691 return NULL;
1692
Victor Stinnere42ccd22015-03-18 01:39:23 +01001693 if (f == NULL) {
1694 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001695 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001696 }
1697
1698 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001699 fclose(f);
1700 return NULL;
1701 }
1702 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001703}
1704
Victor Stinner66aab0c2015-03-19 22:53:20 +01001705/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001706
1707 On success, return the number of read bytes, it can be lower than count.
1708 If the current file offset is at or past the end of file, no bytes are read,
1709 and read() returns zero.
1710
1711 On error, raise an exception, set errno and return -1.
1712
1713 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1714 If the Python signal handler raises an exception, the function returns -1
1715 (the syscall is not retried).
1716
1717 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001718Py_ssize_t
1719_Py_read(int fd, void *buf, size_t count)
1720{
1721 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001722 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001723 int async_err = 0;
1724
Victor Stinner8a1be612016-03-14 22:07:55 +01001725 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001726
Victor Stinner66aab0c2015-03-19 22:53:20 +01001727 /* _Py_read() must not be called with an exception set, otherwise the
1728 * caller may think that read() was interrupted by a signal and the signal
1729 * handler raised an exception. */
1730 assert(!PyErr_Occurred());
1731
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001732 if (count > _PY_READ_MAX) {
1733 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001734 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001735
Steve Dower8fc89802015-04-12 00:26:27 -04001736 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001737 do {
1738 Py_BEGIN_ALLOW_THREADS
1739 errno = 0;
1740#ifdef MS_WINDOWS
1741 n = read(fd, buf, (int)count);
1742#else
1743 n = read(fd, buf, count);
1744#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001745 /* save/restore errno because PyErr_CheckSignals()
1746 * and PyErr_SetFromErrno() can modify it */
1747 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001748 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001749 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001750 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001751 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001752
1753 if (async_err) {
1754 /* read() was interrupted by a signal (failed with EINTR)
1755 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001756 errno = err;
1757 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001758 return -1;
1759 }
1760 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001761 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001762 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001763 return -1;
1764 }
1765
1766 return n;
1767}
1768
Victor Stinner82c3e452015-04-01 18:34:45 +02001769static Py_ssize_t
1770_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001771{
1772 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001773 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001774 int async_err = 0;
1775
Steve Dower8fc89802015-04-12 00:26:27 -04001776 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001777#ifdef MS_WINDOWS
1778 if (count > 32767 && isatty(fd)) {
1779 /* Issue #11395: the Windows console returns an error (12: not
1780 enough space error) on writing into stdout if stdout mode is
1781 binary and the length is greater than 66,000 bytes (or less,
1782 depending on heap usage). */
1783 count = 32767;
1784 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001785#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001786 if (count > _PY_WRITE_MAX) {
1787 count = _PY_WRITE_MAX;
1788 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001789
Victor Stinner82c3e452015-04-01 18:34:45 +02001790 if (gil_held) {
1791 do {
1792 Py_BEGIN_ALLOW_THREADS
1793 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001794#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001795 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001796#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001797 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001798#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001799 /* save/restore errno because PyErr_CheckSignals()
1800 * and PyErr_SetFromErrno() can modify it */
1801 err = errno;
1802 Py_END_ALLOW_THREADS
1803 } while (n < 0 && err == EINTR &&
1804 !(async_err = PyErr_CheckSignals()));
1805 }
1806 else {
1807 do {
1808 errno = 0;
1809#ifdef MS_WINDOWS
1810 n = write(fd, buf, (int)count);
1811#else
1812 n = write(fd, buf, count);
1813#endif
1814 err = errno;
1815 } while (n < 0 && err == EINTR);
1816 }
Steve Dower8fc89802015-04-12 00:26:27 -04001817 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001818
1819 if (async_err) {
1820 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001821 and the Python signal handler raised an exception (if gil_held is
1822 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001823 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001824 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001825 return -1;
1826 }
1827 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001828 if (gil_held)
1829 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001830 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001831 return -1;
1832 }
1833
1834 return n;
1835}
1836
Victor Stinner82c3e452015-04-01 18:34:45 +02001837/* Write count bytes of buf into fd.
1838
1839 On success, return the number of written bytes, it can be lower than count
1840 including 0. On error, raise an exception, set errno and return -1.
1841
1842 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1843 If the Python signal handler raises an exception, the function returns -1
1844 (the syscall is not retried).
1845
1846 Release the GIL to call write(). The caller must hold the GIL. */
1847Py_ssize_t
1848_Py_write(int fd, const void *buf, size_t count)
1849{
Victor Stinner8a1be612016-03-14 22:07:55 +01001850 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001851
Victor Stinner82c3e452015-04-01 18:34:45 +02001852 /* _Py_write() must not be called with an exception set, otherwise the
1853 * caller may think that write() was interrupted by a signal and the signal
1854 * handler raised an exception. */
1855 assert(!PyErr_Occurred());
1856
1857 return _Py_write_impl(fd, buf, count, 1);
1858}
1859
1860/* Write count bytes of buf into fd.
1861 *
1862 * On success, return the number of written bytes, it can be lower than count
1863 * including 0. On error, set errno and return -1.
1864 *
1865 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1866 * without calling the Python signal handler. */
1867Py_ssize_t
1868_Py_write_noraise(int fd, const void *buf, size_t count)
1869{
1870 return _Py_write_impl(fd, buf, count, 0);
1871}
1872
Victor Stinner4e314432010-10-07 21:45:39 +00001873#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001874
1875/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001876 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001877
Victor Stinner1be0d112019-03-18 17:47:26 +01001878 Return -1 on encoding error, on readlink() error, if the internal buffer is
1879 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001880int
Victor Stinner1be0d112019-03-18 17:47:26 +01001881_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001882{
1883 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001884 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001885 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001886 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001887 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001888 size_t r1;
1889
Victor Stinner9dd76202017-12-21 16:20:32 +01001890 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001891 if (cpath == NULL) {
1892 errno = EINVAL;
1893 return -1;
1894 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001895 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001896 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001897 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001898 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001899 }
1900 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001901 errno = EINVAL;
1902 return -1;
1903 }
1904 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001905 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001906 if (wbuf == NULL) {
1907 errno = EINVAL;
1908 return -1;
1909 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001910 /* wbuf must have space to store the trailing NUL character */
1911 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001912 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001913 errno = EINVAL;
1914 return -1;
1915 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001916 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001917 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001918 return (int)r1;
1919}
1920#endif
1921
1922#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001923
1924/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001925 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001926
Victor Stinner1be0d112019-03-18 17:47:26 +01001927 Return NULL on encoding error, realpath() error, decoding error
1928 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001929wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001930_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001931 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001932{
1933 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001934 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001935 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001936 char *res;
1937 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001938 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001939 if (cpath == NULL) {
1940 errno = EINVAL;
1941 return NULL;
1942 }
1943 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001944 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001945 if (res == NULL)
1946 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001947
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001948 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001949 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001950 errno = EINVAL;
1951 return NULL;
1952 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001953 /* wresolved_path must have space to store the trailing NUL character */
1954 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001955 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001956 errno = EINVAL;
1957 return NULL;
1958 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001959 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001960 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001961 return resolved_path;
1962}
1963#endif
1964
Victor Stinner3939c322019-06-25 15:02:43 +02001965
1966#ifndef MS_WINDOWS
1967int
1968_Py_isabs(const wchar_t *path)
1969{
1970 return (path[0] == SEP);
1971}
1972#endif
1973
1974
1975/* Get an absolute path.
1976 On error (ex: fail to get the current directory), return -1.
1977 On memory allocation failure, set *abspath_p to NULL and return 0.
1978 On success, return a newly allocated to *abspath_p to and return 0.
1979 The string must be freed by PyMem_RawFree(). */
1980int
1981_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1982{
1983#ifdef MS_WINDOWS
1984 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1985 DWORD result;
1986
1987 result = GetFullPathNameW(path,
1988 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1989 NULL);
1990 if (!result) {
1991 return -1;
1992 }
1993
1994 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1995 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1996 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1997 }
1998 else {
1999 woutbufp = NULL;
2000 }
2001 if (!woutbufp) {
2002 *abspath_p = NULL;
2003 return 0;
2004 }
2005
2006 result = GetFullPathNameW(path, result, woutbufp, NULL);
2007 if (!result) {
2008 PyMem_RawFree(woutbufp);
2009 return -1;
2010 }
2011 }
2012
2013 if (woutbufp != woutbuf) {
2014 *abspath_p = woutbufp;
2015 return 0;
2016 }
2017
2018 *abspath_p = _PyMem_RawWcsdup(woutbufp);
2019 return 0;
2020#else
2021 if (_Py_isabs(path)) {
2022 *abspath_p = _PyMem_RawWcsdup(path);
2023 return 0;
2024 }
2025
2026 wchar_t cwd[MAXPATHLEN + 1];
2027 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2028 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2029 /* unable to get the current directory */
2030 return -1;
2031 }
2032
2033 size_t cwd_len = wcslen(cwd);
2034 size_t path_len = wcslen(path);
2035 size_t len = cwd_len + 1 + path_len + 1;
2036 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2037 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2038 }
2039 else {
2040 *abspath_p = NULL;
2041 }
2042 if (*abspath_p == NULL) {
2043 return 0;
2044 }
2045
2046 wchar_t *abspath = *abspath_p;
2047 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2048 abspath += cwd_len;
2049
2050 *abspath = (wchar_t)SEP;
2051 abspath++;
2052
2053 memcpy(abspath, path, path_len * sizeof(wchar_t));
2054 abspath += path_len;
2055
2056 *abspath = 0;
2057 return 0;
2058#endif
2059}
2060
2061
Victor Stinnerfaddaed2019-03-19 02:58:14 +01002062/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01002063 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00002064
Victor Stinner1be0d112019-03-18 17:47:26 +01002065 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2066 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00002067wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01002068_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00002069{
2070#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01002071 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2072 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00002073#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002074 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00002075 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00002076 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00002077
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002078 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00002079 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02002080 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00002081 if (wname == NULL)
2082 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01002083 /* wname must have space to store the trailing NUL character */
2084 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02002085 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002086 return NULL;
2087 }
Victor Stinner1be0d112019-03-18 17:47:26 +01002088 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02002089 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002090 return buf;
2091#endif
2092}
2093
Victor Stinnerdaf45552013-08-28 00:53:59 +02002094/* Duplicate a file descriptor. The new file descriptor is created as
2095 non-inheritable. Return a new file descriptor on success, raise an OSError
2096 exception and return -1 on error.
2097
2098 The GIL is released to call dup(). The caller must hold the GIL. */
2099int
2100_Py_dup(int fd)
2101{
2102#ifdef MS_WINDOWS
2103 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002104#endif
2105
Victor Stinner8a1be612016-03-14 22:07:55 +01002106 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01002107
Victor Stinnerdaf45552013-08-28 00:53:59 +02002108#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03002109 handle = _Py_get_osfhandle(fd);
2110 if (handle == INVALID_HANDLE_VALUE)
Victor Stinnerdaf45552013-08-28 00:53:59 +02002111 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002112
Victor Stinnerdaf45552013-08-28 00:53:59 +02002113 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002114 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002115 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002116 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002117 Py_END_ALLOW_THREADS
2118 if (fd < 0) {
2119 PyErr_SetFromErrno(PyExc_OSError);
2120 return -1;
2121 }
2122
Zackery Spytz28fca0c2019-06-17 01:17:14 -06002123 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2124 _Py_BEGIN_SUPPRESS_IPH
2125 close(fd);
2126 _Py_END_SUPPRESS_IPH
2127 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002128 }
2129#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2130 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002131 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002132 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002133 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002134 Py_END_ALLOW_THREADS
2135 if (fd < 0) {
2136 PyErr_SetFromErrno(PyExc_OSError);
2137 return -1;
2138 }
2139
2140#else
2141 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002142 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002143 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002144 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002145 Py_END_ALLOW_THREADS
2146 if (fd < 0) {
2147 PyErr_SetFromErrno(PyExc_OSError);
2148 return -1;
2149 }
2150
2151 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002152 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002153 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002154 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002155 return -1;
2156 }
2157#endif
2158 return fd;
2159}
2160
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002161#ifndef MS_WINDOWS
2162/* Get the blocking mode of the file descriptor.
2163 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2164 raise an exception and return -1 on error. */
2165int
2166_Py_get_blocking(int fd)
2167{
Steve Dower8fc89802015-04-12 00:26:27 -04002168 int flags;
2169 _Py_BEGIN_SUPPRESS_IPH
2170 flags = fcntl(fd, F_GETFL, 0);
2171 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002172 if (flags < 0) {
2173 PyErr_SetFromErrno(PyExc_OSError);
2174 return -1;
2175 }
2176
2177 return !(flags & O_NONBLOCK);
2178}
2179
2180/* Set the blocking mode of the specified file descriptor.
2181
2182 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2183 otherwise.
2184
2185 Return 0 on success, raise an exception and return -1 on error. */
2186int
2187_Py_set_blocking(int fd, int blocking)
2188{
pxinwr06afac62020-12-08 04:41:12 +08002189/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2190 Use fcntl() instead. */
2191#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002192 int arg = !blocking;
2193 if (ioctl(fd, FIONBIO, &arg) < 0)
2194 goto error;
2195#else
2196 int flags, res;
2197
Steve Dower8fc89802015-04-12 00:26:27 -04002198 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002199 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002200 if (flags >= 0) {
2201 if (blocking)
2202 flags = flags & (~O_NONBLOCK);
2203 else
2204 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002205
Steve Dower8fc89802015-04-12 00:26:27 -04002206 res = fcntl(fd, F_SETFL, flags);
2207 } else {
2208 res = -1;
2209 }
2210 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002211
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002212 if (res < 0)
2213 goto error;
2214#endif
2215 return 0;
2216
2217error:
2218 PyErr_SetFromErrno(PyExc_OSError);
2219 return -1;
2220}
Segev Finer5e437fb2021-04-24 01:00:27 +03002221#else /* MS_WINDOWS */
2222void*
2223_Py_get_osfhandle_noraise(int fd)
2224{
2225 void *handle;
2226 _Py_BEGIN_SUPPRESS_IPH
2227 handle = (void*)_get_osfhandle(fd);
2228 _Py_END_SUPPRESS_IPH
2229 return handle;
2230}
Victor Stinnercb064fc2018-01-15 15:58:02 +01002231
Segev Finer5e437fb2021-04-24 01:00:27 +03002232void*
2233_Py_get_osfhandle(int fd)
2234{
2235 void *handle = _Py_get_osfhandle_noraise(fd);
2236 if (handle == INVALID_HANDLE_VALUE)
2237 PyErr_SetFromErrno(PyExc_OSError);
2238
2239 return handle;
2240}
2241
2242int
2243_Py_open_osfhandle_noraise(void *handle, int flags)
2244{
2245 int fd;
2246 _Py_BEGIN_SUPPRESS_IPH
2247 fd = _open_osfhandle((intptr_t)handle, flags);
2248 _Py_END_SUPPRESS_IPH
2249 return fd;
2250}
2251
2252int
2253_Py_open_osfhandle(void *handle, int flags)
2254{
2255 int fd = _Py_open_osfhandle_noraise(handle, flags);
2256 if (fd == -1)
2257 PyErr_SetFromErrno(PyExc_OSError);
2258
2259 return fd;
2260}
2261#endif /* MS_WINDOWS */
Victor Stinnercb064fc2018-01-15 15:58:02 +01002262
2263int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002264_Py_GetLocaleconvNumeric(struct lconv *lc,
2265 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002266{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002267 assert(decimal_point != NULL);
2268 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002269
TIGirardif2312032020-10-20 08:39:52 -03002270#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002271 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002272 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002273 change_locale = 1;
2274 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002275 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002276 change_locale = 1;
2277 }
2278
2279 /* Keep a copy of the LC_CTYPE locale */
2280 char *oldloc = NULL, *loc = NULL;
2281 if (change_locale) {
2282 oldloc = setlocale(LC_CTYPE, NULL);
2283 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002284 PyErr_SetString(PyExc_RuntimeWarning,
2285 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002286 return -1;
2287 }
2288
2289 oldloc = _PyMem_Strdup(oldloc);
2290 if (!oldloc) {
2291 PyErr_NoMemory();
2292 return -1;
2293 }
2294
2295 loc = setlocale(LC_NUMERIC, NULL);
2296 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2297 loc = NULL;
2298 }
2299
2300 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002301 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002302 if LC_NUMERIC locale is different than LC_CTYPE locale and
2303 decimal_point and/or thousands_sep are non-ASCII or longer than
2304 1 byte */
2305 setlocale(LC_CTYPE, loc);
2306 }
2307 }
2308
TIGirardif2312032020-10-20 08:39:52 -03002309#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2310#else /* MS_WINDOWS */
2311/* Use _W_* fields of Windows strcut lconv */
2312#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2313#endif /* MS_WINDOWS */
2314
Victor Stinner02e6bf72018-11-20 16:20:16 +01002315 int res = -1;
2316
TIGirardif2312032020-10-20 08:39:52 -03002317 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002318 if (*decimal_point == NULL) {
2319 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002320 }
2321
TIGirardif2312032020-10-20 08:39:52 -03002322 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002323 if (*thousands_sep == NULL) {
2324 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002325 }
2326
2327 res = 0;
2328
Victor Stinner02e6bf72018-11-20 16:20:16 +01002329done:
TIGirardif2312032020-10-20 08:39:52 -03002330#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002331 if (loc != NULL) {
2332 setlocale(LC_CTYPE, oldloc);
2333 }
2334 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002335#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002336 return res;
TIGirardif2312032020-10-20 08:39:52 -03002337
2338#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002339}
Kyle Evans79925792020-10-13 15:04:44 -05002340
2341/* Our selection logic for which function to use is as follows:
2342 * 1. If close_range(2) is available, always prefer that; it's better for
2343 * contiguous ranges like this than fdwalk(3) which entails iterating over
2344 * the entire fd space and simply doing nothing for those outside the range.
2345 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2346 * closing up to sysconf(_SC_OPEN_MAX).
2347 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2348 * as that will be more performant if the range happens to have any chunk of
2349 * non-opened fd in the middle.
2350 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2351 */
2352#ifdef __FreeBSD__
2353# define USE_CLOSEFROM
2354#endif /* __FreeBSD__ */
2355
2356#ifdef HAVE_FDWALK
2357# define USE_FDWALK
2358#endif /* HAVE_FDWALK */
2359
2360#ifdef USE_FDWALK
2361static int
2362_fdwalk_close_func(void *lohi, int fd)
2363{
2364 int lo = ((int *)lohi)[0];
2365 int hi = ((int *)lohi)[1];
2366
2367 if (fd >= hi) {
2368 return 1;
2369 }
2370 else if (fd >= lo) {
2371 /* Ignore errors */
2372 (void)close(fd);
2373 }
2374 return 0;
2375}
2376#endif /* USE_FDWALK */
2377
2378/* Closes all file descriptors in [first, last], ignoring errors. */
2379void
2380_Py_closerange(int first, int last)
2381{
2382 first = Py_MAX(first, 0);
2383 _Py_BEGIN_SUPPRESS_IPH
2384#ifdef HAVE_CLOSE_RANGE
2385 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2386 /* Any errors encountered while closing file descriptors are ignored;
2387 * ENOSYS means no kernel support, though,
2388 * so we'll fallback to the other methods. */
2389 }
2390 else
2391#endif /* HAVE_CLOSE_RANGE */
2392#ifdef USE_CLOSEFROM
2393 if (last >= sysconf(_SC_OPEN_MAX)) {
2394 /* Any errors encountered while closing file descriptors are ignored */
2395 closefrom(first);
2396 }
2397 else
2398#endif /* USE_CLOSEFROM */
2399#ifdef USE_FDWALK
2400 {
2401 int lohi[2];
2402 lohi[0] = first;
2403 lohi[1] = last + 1;
2404 fdwalk(_fdwalk_close_func, lohi);
2405 }
2406#else
2407 {
2408 for (int i = first; i <= last; i++) {
2409 /* Ignore errors */
2410 (void)close(i);
2411 }
2412 }
2413#endif /* USE_FDWALK */
2414 _Py_END_SUPPRESS_IPH
2415}