blob: e8a7eda505c7ec07c805becea63333ae09ded0cd [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
Jakub Kulík9032cf52021-04-30 15:21:42 +020021#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
22#include <iconv.h>
23#endif
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef HAVE_FCNTL_H
26#include <fcntl.h>
27#endif /* HAVE_FCNTL_H */
28
Victor Stinnerdaf45552013-08-28 00:53:59 +020029#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020030/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020031
32 -1: unknown
33 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
34 1: open() supports O_CLOEXEC flag, close-on-exec is set
35
Victor Stinnera555cfc2015-03-18 00:22:14 +010036 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
37 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020038int _Py_open_cloexec_works = -1;
39#endif
40
Victor Stinner99768342021-03-17 21:46:53 +010041// The value must be the same in unicodeobject.c.
42#define MAX_UNICODE 0x10ffff
43
44// mbstowcs() and mbrtowc() errors
45static const size_t DECODE_ERROR = ((size_t)-1);
46static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
47
Victor Stinner3d4226a2018-08-29 22:21:32 +020048
49static int
50get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
51{
52 switch (errors)
53 {
54 case _Py_ERROR_STRICT:
55 *surrogateescape = 0;
56 return 0;
57 case _Py_ERROR_SURROGATEESCAPE:
58 *surrogateescape = 1;
59 return 0;
60 default:
61 return -1;
62 }
63}
64
65
Brett Cannonefb00c02012-02-29 18:31:31 -050066PyObject *
67_Py_device_encoding(int fd)
68{
Steve Dower8fc89802015-04-12 00:26:27 -040069 int valid;
70 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070071 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040072 _Py_END_SUPPRESS_IPH
73 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050074 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040075
Victor Stinner14b9b112013-06-25 00:37:25 +020076#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010077 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050078 if (fd == 0)
79 cp = GetConsoleCP();
80 else if (fd == 1 || fd == 2)
81 cp = GetConsoleOutputCP();
82 else
83 cp = 0;
84 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
85 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010086 if (cp == 0) {
87 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050088 }
Victor Stinner35297182020-11-04 11:20:10 +010089
90 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
91#else
92 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050093#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050094}
95
Victor Stinner99768342021-03-17 21:46:53 +010096
97static size_t
98is_valid_wide_char(wchar_t ch)
99{
Jakub Kulík9032cf52021-04-30 15:21:42 +0200100#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
101 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
102 for non-Unicode locales, which makes values higher than MAX_UNICODE
103 possibly valid. */
104 return 1;
105#endif
Victor Stinner99768342021-03-17 21:46:53 +0100106 if (Py_UNICODE_IS_SURROGATE(ch)) {
107 // Reject lone surrogate characters
108 return 0;
109 }
110 if (ch > MAX_UNICODE) {
111 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
112 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
113 // it creates characters outside the [U+0000; U+10ffff] range:
114 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
115 return 0;
116 }
117 return 1;
118}
119
120
121static size_t
122_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
123{
124 size_t count = mbstowcs(dest, src, n);
125 if (dest != NULL && count != DECODE_ERROR) {
126 for (size_t i=0; i < count; i++) {
127 wchar_t ch = dest[i];
128 if (!is_valid_wide_char(ch)) {
129 return DECODE_ERROR;
130 }
131 }
132 }
133 return count;
134}
135
136
137#ifdef HAVE_MBRTOWC
138static size_t
139_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
140{
141 assert(pwc != NULL);
142 size_t count = mbrtowc(pwc, str, len, pmbs);
143 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
144 if (!is_valid_wide_char(*pwc)) {
145 return DECODE_ERROR;
146 }
147 }
148 return count;
149}
150#endif
151
152
Victor Stinnere2510952019-05-02 11:28:57 -0400153#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100154
155#define USE_FORCE_ASCII
156
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100157extern int _Py_normalize_encoding(const char *, char *, size_t);
158
Victor Stinnerd500e532018-08-28 17:27:36 +0200159/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
160 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100161 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
162 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
163 locale.getpreferredencoding() codec. For example, if command line arguments
164 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
165 UnicodeEncodeError instead of retrieving the original byte string.
166
167 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
168 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
169 one byte in range 0x80-0xff can be decoded from the locale encoding. The
170 workaround is also enabled on error, for example if getting the locale
171 failed.
172
Victor Stinnerd500e532018-08-28 17:27:36 +0200173 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
174 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
175 ASCII encoding in this case.
176
Philip Jenvey215c49a2013-01-15 13:24:12 -0800177 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100178
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200179 1: the workaround is used: Py_EncodeLocale() uses
180 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100181 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200182 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
183 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100184 -1: unknown, need to call check_force_ascii() to get the value
185*/
186static int force_ascii = -1;
187
188static int
189check_force_ascii(void)
190{
Victor Stinnerd500e532018-08-28 17:27:36 +0200191 char *loc = setlocale(LC_CTYPE, NULL);
192 if (loc == NULL) {
193 goto error;
194 }
195 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
196 /* the LC_CTYPE locale is different than C and POSIX */
197 return 0;
198 }
199
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100200#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200201 const char *codeset = nl_langinfo(CODESET);
202 if (!codeset || codeset[0] == '\0') {
203 /* CODESET is not set or empty */
204 goto error;
205 }
206
Victor Stinner54de2b12016-09-09 23:11:52 -0700207 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200208 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
209 goto error;
210 }
211
212#ifdef __hpux
213 if (strcmp(encoding, "roman8") == 0) {
214 unsigned char ch;
215 wchar_t wch;
216 size_t res;
217
218 ch = (unsigned char)0xA7;
Victor Stinner99768342021-03-17 21:46:53 +0100219 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
220 if (res != DECODE_ERROR && wch == L'\xA7') {
Victor Stinnerd500e532018-08-28 17:27:36 +0200221 /* On HP-UX withe C locale or the POSIX locale,
222 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
223 Latin1 encoding in practice. Force ASCII in this case.
224
225 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
226 return 1;
227 }
228 }
229#else
230 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100231 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700232 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100233 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700234 "ansi_x3.4_1968",
235 "ansi_x3.4_1986",
236 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100237 "cp367",
238 "csascii",
239 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700240 "iso646_us",
241 "iso_646.irv_1991",
242 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100243 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700244 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100245 NULL
246 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100247
Victor Stinnerd500e532018-08-28 17:27:36 +0200248 int is_ascii = 0;
249 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100250 if (strcmp(encoding, *alias) == 0) {
251 is_ascii = 1;
252 break;
253 }
254 }
255 if (!is_ascii) {
256 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
257 return 0;
258 }
259
Victor Stinnerd500e532018-08-28 17:27:36 +0200260 for (unsigned int i=0x80; i<=0xff; i++) {
261 char ch[1];
262 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100263 size_t res;
264
Victor Stinnerd500e532018-08-28 17:27:36 +0200265 unsigned uch = (unsigned char)i;
266 ch[0] = (char)uch;
Victor Stinner99768342021-03-17 21:46:53 +0100267 res = _Py_mbstowcs(wch, ch, 1);
268 if (res != DECODE_ERROR) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100269 /* decoding a non-ASCII character from the locale encoding succeed:
270 the locale encoding is not ASCII, force ASCII */
271 return 1;
272 }
273 }
274 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
275 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200276#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100277 return 0;
278#else
279 /* nl_langinfo(CODESET) is not available: always force ASCII */
280 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200281#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100282
283error:
Martin Panter46f50722016-05-26 05:35:26 +0000284 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100285 return 1;
286}
287
Victor Stinnerd500e532018-08-28 17:27:36 +0200288
289int
290_Py_GetForceASCII(void)
291{
292 if (force_ascii == -1) {
293 force_ascii = check_force_ascii();
294 }
295 return force_ascii;
296}
297
298
Victor Stinner353933e2018-11-23 13:08:26 +0100299void
300_Py_ResetForceASCII(void)
301{
302 force_ascii = -1;
303}
304
305
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100306static int
307encode_ascii(const wchar_t *text, char **str,
308 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200309 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100310{
311 char *result = NULL, *out;
312 size_t len, i;
313 wchar_t ch;
314
Victor Stinner3d4226a2018-08-29 22:21:32 +0200315 int surrogateescape;
316 if (get_surrogateescape(errors, &surrogateescape) < 0) {
317 return -3;
318 }
319
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100320 len = wcslen(text);
321
Victor Stinner9bee3292017-12-21 16:49:13 +0100322 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100323 if (raw_malloc) {
324 result = PyMem_RawMalloc(len + 1);
325 }
326 else {
327 result = PyMem_Malloc(len + 1);
328 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100329 if (result == NULL) {
330 return -1;
331 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100332
333 out = result;
334 for (i=0; i<len; i++) {
335 ch = text[i];
336
337 if (ch <= 0x7f) {
338 /* ASCII character */
339 *out++ = (char)ch;
340 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100341 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100342 /* UTF-8b surrogate */
343 *out++ = (char)(ch - 0xdc00);
344 }
345 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100346 if (raw_malloc) {
347 PyMem_RawFree(result);
348 }
349 else {
350 PyMem_Free(result);
351 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100352 if (error_pos != NULL) {
353 *error_pos = i;
354 }
355 if (reason) {
356 *reason = "encoding error";
357 }
358 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100359 }
360 }
361 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100362 *str = result;
363 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100364}
Victor Stinnerd500e532018-08-28 17:27:36 +0200365#else
366int
367_Py_GetForceASCII(void)
368{
369 return 0;
370}
Victor Stinner353933e2018-11-23 13:08:26 +0100371
372void
373_Py_ResetForceASCII(void)
374{
375 /* nothing to do */
376}
Victor Stinnere2510952019-05-02 11:28:57 -0400377#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100378
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100379
380#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
381static int
382decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200383 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100384{
385 wchar_t *res;
386 unsigned char *in;
387 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600388 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100389
Victor Stinner3d4226a2018-08-29 22:21:32 +0200390 int surrogateescape;
391 if (get_surrogateescape(errors, &surrogateescape) < 0) {
392 return -3;
393 }
394
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100395 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
396 return -1;
397 }
398 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
399 if (!res) {
400 return -1;
401 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100402
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100403 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100404 for (in = (unsigned char*)arg; *in; in++) {
405 unsigned char ch = *in;
406 if (ch < 128) {
407 *out++ = ch;
408 }
409 else {
410 if (!surrogateescape) {
411 PyMem_RawFree(res);
412 if (wlen) {
413 *wlen = in - (unsigned char*)arg;
414 }
415 if (reason) {
416 *reason = "decoding error";
417 }
418 return -2;
419 }
420 *out++ = 0xdc00 + ch;
421 }
422 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100423 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100424
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100425 if (wlen != NULL) {
426 *wlen = out - res;
427 }
428 *wstr = res;
429 return 0;
430}
431#endif /* !HAVE_MBRTOWC */
432
433static int
434decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200435 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000436{
437 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100438 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000439 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200440#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000441 unsigned char *in;
442 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000443 mbstate_t mbs;
444#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100445
Victor Stinner3d4226a2018-08-29 22:21:32 +0200446 int surrogateescape;
447 if (get_surrogateescape(errors, &surrogateescape) < 0) {
448 return -3;
449 }
450
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100451#ifdef HAVE_BROKEN_MBSTOWCS
452 /* Some platforms have a broken implementation of
453 * mbstowcs which does not count the characters that
454 * would result from conversion. Use an upper bound.
455 */
456 argsize = strlen(arg);
457#else
Victor Stinner99768342021-03-17 21:46:53 +0100458 argsize = _Py_mbstowcs(NULL, arg, 0);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100459#endif
Victor Stinner99768342021-03-17 21:46:53 +0100460 if (argsize != DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100461 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
462 return -1;
463 }
464 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
465 if (!res) {
466 return -1;
467 }
468
Victor Stinner99768342021-03-17 21:46:53 +0100469 count = _Py_mbstowcs(res, arg, argsize + 1);
470 if (count != DECODE_ERROR) {
471 *wstr = res;
472 if (wlen != NULL) {
473 *wlen = count;
Victor Stinner168e1172010-10-16 23:16:16 +0000474 }
Victor Stinner99768342021-03-17 21:46:53 +0100475 return 0;
Victor Stinner4e314432010-10-07 21:45:39 +0000476 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200477 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000478 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100479
Victor Stinner4e314432010-10-07 21:45:39 +0000480 /* Conversion failed. Fall back to escaping with surrogateescape. */
481#ifdef HAVE_MBRTOWC
482 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
483
484 /* Overallocate; as multi-byte characters are in the argument, the
485 actual output could use less memory. */
486 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100487 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
488 return -1;
489 }
490 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
491 if (!res) {
492 return -1;
493 }
494
Victor Stinner4e314432010-10-07 21:45:39 +0000495 in = (unsigned char*)arg;
496 out = res;
497 memset(&mbs, 0, sizeof mbs);
498 while (argsize) {
Victor Stinner99768342021-03-17 21:46:53 +0100499 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100500 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000501 /* Reached end of string; null char stored. */
502 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100503 }
504
Victor Stinner99768342021-03-17 21:46:53 +0100505 if (converted == INCOMPLETE_CHARACTER) {
Victor Stinner4e314432010-10-07 21:45:39 +0000506 /* Incomplete character. This should never happen,
507 since we provide everything that we have -
508 unless there is a bug in the C library, or I
509 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100510 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000511 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100512
Victor Stinner99768342021-03-17 21:46:53 +0100513 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100514 if (!surrogateescape) {
515 goto decode_error;
516 }
517
Victor Stinner99768342021-03-17 21:46:53 +0100518 /* Decoding error. Escape as UTF-8b, and start over in the initial
519 shift state. */
Victor Stinner4e314432010-10-07 21:45:39 +0000520 *out++ = 0xdc00 + *in++;
521 argsize--;
522 memset(&mbs, 0, sizeof mbs);
523 continue;
524 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100525
Victor Stinner99768342021-03-17 21:46:53 +0100526 // _Py_mbrtowc() reject lone surrogate characters
527 assert(!Py_UNICODE_IS_SURROGATE(*out));
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100528
Victor Stinner4e314432010-10-07 21:45:39 +0000529 /* successfully converted some bytes */
530 in += converted;
531 argsize -= converted;
532 out++;
533 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100534 if (wlen != NULL) {
535 *wlen = out - res;
536 }
537 *wstr = res;
538 return 0;
539
540decode_error:
541 PyMem_RawFree(res);
542 if (wlen) {
543 *wlen = in - (unsigned char*)arg;
544 }
545 if (reason) {
546 *reason = "decoding error";
547 }
548 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100549#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000550 /* Cannot use C locale for escaping; manually escape as if charset
551 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
552 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200553 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100554#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100555}
556
557
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100558/* Decode a byte string from the locale encoding.
559
560 Use the strict error handler if 'surrogateescape' is zero. Use the
561 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
562 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
563 can be decoded as a surrogate character, escape the bytes using the
564 surrogateescape error handler instead of decoding them.
565
Ville Skyttä61f82e02018-04-20 23:08:45 +0300566 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100567 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
568 the number of wide characters excluding the null character into *wlen.
569
570 On memory allocation failure, return -1.
571
572 On decoding error, return -2. If wlen is not NULL, write the start of
573 invalid byte sequence in the input string into *wlen. If reason is not NULL,
574 write the decoding error message into *reason.
575
Victor Stinner3d4226a2018-08-29 22:21:32 +0200576 Return -3 if the error handler 'errors' is not supported.
577
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100578 Use the Py_EncodeLocaleEx() function to encode the character string back to
579 a byte string. */
580int
581_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
582 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200583 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100584{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100585 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400586#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100587 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200588 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100589#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200590 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100591#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100592 }
593
Victor Stinnere2510952019-05-02 11:28:57 -0400594#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100595 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200596 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100597#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200598 int use_utf8 = (Py_UTF8Mode == 1);
599#ifdef MS_WINDOWS
600 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
601#endif
602 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200603 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100605 }
606
607#ifdef USE_FORCE_ASCII
608 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100609 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100610 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100611
612 if (force_ascii) {
613 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200614 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100615 }
616#endif
617
Victor Stinner3d4226a2018-08-29 22:21:32 +0200618 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400619#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100620}
621
622
Victor Stinner91106cd2017-12-13 12:29:09 +0100623/* Decode a byte string from the locale encoding with the
624 surrogateescape error handler: undecodable bytes are decoded as characters
625 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
626 character, escape the bytes using the surrogateescape error handler instead
627 of decoding them.
628
629 Return a pointer to a newly allocated wide character string, use
630 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
631 wide characters excluding the null character into *size
632
633 Return NULL on decoding error or memory allocation error. If *size* is not
634 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
635 decoding error.
636
637 Decoding errors should never happen, unless there is a bug in the C
638 library.
639
640 Use the Py_EncodeLocale() function to encode the character string back to a
641 byte string. */
642wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100643Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100644{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100645 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200646 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
647 NULL, 0,
648 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100649 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200650 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 if (wlen != NULL) {
652 *wlen = (size_t)res;
653 }
654 return NULL;
655 }
656 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100657}
Victor Stinner91106cd2017-12-13 12:29:09 +0100658
Victor Stinner91106cd2017-12-13 12:29:09 +0100659
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100660static int
661encode_current_locale(const wchar_t *text, char **str,
662 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200663 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100664{
Victor Stinner4e314432010-10-07 21:45:39 +0000665 const size_t len = wcslen(text);
666 char *result = NULL, *bytes = NULL;
667 size_t i, size, converted;
668 wchar_t c, buf[2];
669
Victor Stinner3d4226a2018-08-29 22:21:32 +0200670 int surrogateescape;
671 if (get_surrogateescape(errors, &surrogateescape) < 0) {
672 return -3;
673 }
674
Victor Stinner4e314432010-10-07 21:45:39 +0000675 /* The function works in two steps:
676 1. compute the length of the output buffer in bytes (size)
677 2. outputs the bytes */
678 size = 0;
679 buf[1] = 0;
680 while (1) {
681 for (i=0; i < len; i++) {
682 c = text[i];
683 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 if (!surrogateescape) {
685 goto encode_error;
686 }
Victor Stinner4e314432010-10-07 21:45:39 +0000687 /* UTF-8b surrogate */
688 if (bytes != NULL) {
689 *bytes++ = c - 0xdc00;
690 size--;
691 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100692 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000693 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100694 }
Victor Stinner4e314432010-10-07 21:45:39 +0000695 continue;
696 }
697 else {
698 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100699 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000700 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100701 }
702 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000703 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100704 }
Victor Stinner99768342021-03-17 21:46:53 +0100705 if (converted == DECODE_ERROR) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100706 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000707 }
708 if (bytes != NULL) {
709 bytes += converted;
710 size -= converted;
711 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100712 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000713 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100714 }
Victor Stinner4e314432010-10-07 21:45:39 +0000715 }
716 }
717 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100718 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000719 break;
720 }
721
722 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100723 if (raw_malloc) {
724 result = PyMem_RawMalloc(size);
725 }
726 else {
727 result = PyMem_Malloc(size);
728 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100729 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100730 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100731 }
Victor Stinner4e314432010-10-07 21:45:39 +0000732 bytes = result;
733 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100734 *str = result;
735 return 0;
736
737encode_error:
738 if (raw_malloc) {
739 PyMem_RawFree(result);
740 }
741 else {
742 PyMem_Free(result);
743 }
744 if (error_pos != NULL) {
745 *error_pos = i;
746 }
747 if (reason) {
748 *reason = "encoding error";
749 }
750 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100751}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100752
Victor Stinner3d4226a2018-08-29 22:21:32 +0200753
754/* Encode a string to the locale encoding.
755
756 Parameters:
757
758 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
759 of PyMem_Malloc().
760 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
761 Python filesystem encoding.
762 * errors: error handler like "strict" or "surrogateescape".
763
764 Return value:
765
766 0: success, *str is set to a newly allocated decoded string.
767 -1: memory allocation failure
768 -2: encoding error, set *error_pos and *reason (if set).
769 -3: the error handler 'errors' is not supported.
770 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100771static int
772encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
773 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200774 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100775{
776 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400777#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100778 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200779 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100780#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100781 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200782 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100783#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100784 }
785
Victor Stinnere2510952019-05-02 11:28:57 -0400786#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100787 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200788 raw_malloc, errors);
789#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200790 int use_utf8 = (Py_UTF8Mode == 1);
791#ifdef MS_WINDOWS
792 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
793#endif
794 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100795 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200796 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100797 }
798
799#ifdef USE_FORCE_ASCII
800 if (force_ascii == -1) {
801 force_ascii = check_force_ascii();
802 }
803
804 if (force_ascii) {
805 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200806 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100807 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100808#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100809
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100810 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200811 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400812#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100813}
814
Victor Stinner9dd76202017-12-21 16:20:32 +0100815static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100816encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100817 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100818{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100819 char *str;
820 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200821 raw_malloc, current_locale,
822 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100823 if (res != -2 && error_pos) {
824 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100825 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100826 if (res != 0) {
827 return NULL;
828 }
829 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100830}
831
Victor Stinner91106cd2017-12-13 12:29:09 +0100832/* Encode a wide character string to the locale encoding with the
833 surrogateescape error handler: surrogate characters in the range
834 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
835
836 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
837 the memory. Return NULL on encoding or memory allocation error.
838
839 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
840 to the index of the invalid character on encoding error.
841
842 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
843 character string. */
844char*
845Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
846{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100847 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100848}
Victor Stinner91106cd2017-12-13 12:29:09 +0100849
Victor Stinner91106cd2017-12-13 12:29:09 +0100850
Victor Stinner9dd76202017-12-21 16:20:32 +0100851/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
852 instead of PyMem_Free(). */
853char*
854_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
855{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100856 return encode_locale(text, error_pos, 1, 0);
857}
858
859
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100860int
861_Py_EncodeLocaleEx(const wchar_t *text, char **str,
862 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200863 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100864{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100865 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200866 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000867}
868
Victor Stinner6672d0c2010-10-07 22:53:43 +0000869
Victor Stinner82458b62020-11-01 20:59:35 +0100870// Get the current locale encoding name:
871//
872// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
873// - Return "UTF-8" if the UTF-8 Mode is enabled
874// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100875// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100876// - Otherwise, return nl_langinfo(CODESET).
877//
Victor Stinnere662c392020-11-01 23:07:23 +0100878// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100879//
Victor Stinner710e8262020-10-31 01:02:09 +0100880// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100881wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100882_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100883{
884#ifdef _Py_FORCE_UTF8_LOCALE
885 // On Android langinfo.h and CODESET are missing,
886 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100887 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100888#else
889 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
890 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100891 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100892 }
893
Victor Stinner82458b62020-11-01 20:59:35 +0100894#ifdef MS_WINDOWS
895 wchar_t encoding[23];
896 unsigned int ansi_codepage = GetACP();
897 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
898 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
899 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100900#else
901 const char *encoding = nl_langinfo(CODESET);
902 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100903 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
904 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100905 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100906 }
Victor Stinner710e8262020-10-31 01:02:09 +0100907
Victor Stinner82458b62020-11-01 20:59:35 +0100908 wchar_t *wstr;
909 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100910 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100911 if (res < 0) {
912 return NULL;
913 }
914 return wstr;
915#endif // !MS_WINDOWS
916
917#endif // !_Py_FORCE_UTF8_LOCALE
918}
919
920
921PyObject *
922_Py_GetLocaleEncodingObject(void)
923{
Victor Stinnere662c392020-11-01 23:07:23 +0100924 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100925 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100926 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100927 return NULL;
928 }
929
930 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
931 PyMem_RawFree(encoding);
932 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100933}
934
Jakub Kulík9032cf52021-04-30 15:21:42 +0200935#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
936
937/* Check whether current locale uses Unicode as internal wchar_t form. */
938int
939_Py_LocaleUsesNonUnicodeWchar(void)
940{
941 /* Oracle Solaris uses non-Unicode internal wchar_t form for
942 non-Unicode locales and hence needs conversion to UTF first. */
943 char* codeset = nl_langinfo(CODESET);
944 if (!codeset) {
945 return 0;
946 }
947 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
948 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
949}
950
951static wchar_t *
952_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
953 const char *tocode, const char *fromcode)
954{
955 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
956
957 /* Ensure we won't overflow the size. */
958 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
959 PyErr_NoMemory();
960 return NULL;
961 }
962
963 /* the string doesn't have to be NULL terminated */
964 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
965 if (target == NULL) {
966 PyErr_NoMemory();
967 return NULL;
968 }
969
970 iconv_t cd = iconv_open(tocode, fromcode);
971 if (cd == (iconv_t)-1) {
972 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
973 PyMem_Free(target);
974 return NULL;
975 }
976
977 char *inbuf = (char *) source;
978 char *outbuf = (char *) target;
979 size_t inbytesleft = sizeof(wchar_t) * size;
980 size_t outbytesleft = inbytesleft;
981
982 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
983 if (ret == DECODE_ERROR) {
984 PyErr_Format(PyExc_ValueError, "iconv() failed");
985 PyMem_Free(target);
986 iconv_close(cd);
987 return NULL;
988 }
989
990 iconv_close(cd);
991 return target;
992}
993
994/* Convert a wide character string to the UCS-4 encoded string. This
995 is necessary on systems where internal form of wchar_t are not Unicode
996 code points (e.g. Oracle Solaris).
997
998 Return a pointer to a newly allocated string, use PyMem_Free() to free
999 the memory. Return NULL and raise exception on conversion or memory
1000 allocation error. */
1001wchar_t *
1002_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1003{
1004 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1005}
1006
1007/* Convert a UCS-4 encoded string to native wide character string. This
1008 is necessary on systems where internal form of wchar_t are not Unicode
1009 code points (e.g. Oracle Solaris).
1010
1011 The conversion is done in place. This can be done because both wchar_t
1012 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1013 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1014 which is currently the only system using these functions; it doesn't have
1015 to be for other systems).
1016
1017 Return 0 on success. Return -1 and raise exception on conversion
1018 or memory allocation error. */
1019int
1020_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1021{
1022 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1023 if (!result) {
1024 return -1;
1025 }
1026 memcpy(unicode, result, size * sizeof(wchar_t));
1027 PyMem_Free(result);
1028 return 0;
1029}
1030#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
Victor Stinner710e8262020-10-31 01:02:09 +01001031
Steve Dowerf2f373f2015-02-21 08:44:05 -08001032#ifdef MS_WINDOWS
1033static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1034
1035static void
1036FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1037{
1038 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1039 /* Cannot simply cast and dereference in_ptr,
1040 since it might not be aligned properly */
1041 __int64 in;
1042 memcpy(&in, in_ptr, sizeof(in));
1043 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1044 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1045}
1046
1047void
Steve Dowerbf1f3762015-02-21 15:26:02 -08001048_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001049{
1050 /* XXX endianness */
1051 __int64 out;
1052 out = time_in + secs_between_epochs;
1053 out = out * 10000000 + nsec_in / 100;
1054 memcpy(out_ptr, &out, sizeof(out));
1055}
1056
1057/* Below, we *know* that ugo+r is 0444 */
1058#if _S_IREAD != 0400
1059#error Unsupported C library
1060#endif
1061static int
1062attributes_to_mode(DWORD attr)
1063{
1064 int m = 0;
1065 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1066 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1067 else
1068 m |= _S_IFREG;
1069 if (attr & FILE_ATTRIBUTE_READONLY)
1070 m |= 0444;
1071 else
1072 m |= 0666;
1073 return m;
1074}
1075
Steve Dowerbf1f3762015-02-21 15:26:02 -08001076void
Victor Stinnere134a7f2015-03-30 10:09:31 +02001077_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1078 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001079{
1080 memset(result, 0, sizeof(*result));
1081 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1082 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1083 result->st_dev = info->dwVolumeSerialNumber;
1084 result->st_rdev = result->st_dev;
1085 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1086 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1087 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1088 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +01001089 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -07001090 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1091 open other name surrogate reparse points without traversing them. To
1092 detect/handle these, check st_file_attributes and st_reparse_tag. */
1093 result->st_reparse_tag = reparse_tag;
1094 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1095 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -08001096 /* first clear the S_IFMT bits */
1097 result->st_mode ^= (result->st_mode & S_IFMT);
1098 /* now set the bits that make this a symlink */
1099 result->st_mode |= S_IFLNK;
1100 }
1101 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001102}
1103#endif
1104
1105/* Return information about a file.
1106
1107 On POSIX, use fstat().
1108
1109 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001110 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1111 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -08001112 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +02001113
1114 On Windows, set the last Windows error and return nonzero on error. On
1115 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1116 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -08001117int
Victor Stinnere134a7f2015-03-30 10:09:31 +02001118_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -08001119{
1120#ifdef MS_WINDOWS
1121 BY_HANDLE_FILE_INFORMATION info;
1122 HANDLE h;
1123 int type;
1124
Segev Finer5e437fb2021-04-24 01:00:27 +03001125 h = _Py_get_osfhandle_noraise(fd);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001126
1127 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -04001128 /* errno is already set by _get_osfhandle, but we also set
1129 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -08001130 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001131 return -1;
1132 }
Victor Stinnere134a7f2015-03-30 10:09:31 +02001133 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -08001134
1135 type = GetFileType(h);
1136 if (type == FILE_TYPE_UNKNOWN) {
1137 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -04001138 if (error != 0) {
1139 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001140 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -04001141 }
Steve Dowerf2f373f2015-02-21 08:44:05 -08001142 /* else: valid but unknown file */
1143 }
1144
1145 if (type != FILE_TYPE_DISK) {
1146 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001147 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001148 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001149 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001150 return 0;
1151 }
1152
1153 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001154 /* The Win32 error is already set, but we also set errno for
1155 callers who expect it */
1156 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001157 return -1;
1158 }
1159
Victor Stinnere134a7f2015-03-30 10:09:31 +02001160 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001161 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001162 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001163 return 0;
1164#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001165 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001166#endif
1167}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001168
Victor Stinnere134a7f2015-03-30 10:09:31 +02001169/* Return information about a file.
1170
1171 On POSIX, use fstat().
1172
1173 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001174 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1175 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001176 #23152.
1177
1178 Raise an exception and return -1 on error. On Windows, set the last Windows
1179 error on error. On POSIX, set errno on error. Fill status and return 0 on
1180 success.
1181
Victor Stinner6f4fae82015-04-01 18:34:32 +02001182 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1183 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001184int
1185_Py_fstat(int fd, struct _Py_stat_struct *status)
1186{
1187 int res;
1188
Victor Stinner8a1be612016-03-14 22:07:55 +01001189 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001190
Victor Stinnere134a7f2015-03-30 10:09:31 +02001191 Py_BEGIN_ALLOW_THREADS
1192 res = _Py_fstat_noraise(fd, status);
1193 Py_END_ALLOW_THREADS
1194
1195 if (res != 0) {
1196#ifdef MS_WINDOWS
1197 PyErr_SetFromWindowsErr(0);
1198#else
1199 PyErr_SetFromErrno(PyExc_OSError);
1200#endif
1201 return -1;
1202 }
1203 return 0;
1204}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001205
Victor Stinner6672d0c2010-10-07 22:53:43 +00001206/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1207 call stat() otherwise. Only fill st_mode attribute on Windows.
1208
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001209 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1210 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001211
1212int
Victor Stinnera4a75952010-10-07 22:23:10 +00001213_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001214{
1215#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001216 int err;
1217 struct _stat wstatbuf;
1218
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001219#if USE_UNICODE_WCHAR_CACHE
1220 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1221#else /* USE_UNICODE_WCHAR_CACHE */
1222 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1223#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001224 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001225 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001226
Victor Stinneree587ea2011-11-17 00:51:38 +01001227 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001228 if (!err)
1229 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001230#if !USE_UNICODE_WCHAR_CACHE
1231 PyMem_Free(wpath);
1232#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001233 return err;
1234#else
1235 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001236 PyObject *bytes;
1237 char *cpath;
1238
1239 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001240 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001241 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001242
1243 /* check for embedded null bytes */
1244 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1245 Py_DECREF(bytes);
1246 return -2;
1247 }
1248
1249 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001250 Py_DECREF(bytes);
1251 return ret;
1252#endif
1253}
1254
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001255
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001256/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001257static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001258get_inheritable(int fd, int raise)
1259{
1260#ifdef MS_WINDOWS
1261 HANDLE handle;
1262 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001263
Segev Finer5e437fb2021-04-24 01:00:27 +03001264 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001265 if (handle == INVALID_HANDLE_VALUE) {
1266 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001267 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001268 return -1;
1269 }
1270
1271 if (!GetHandleInformation(handle, &flags)) {
1272 if (raise)
1273 PyErr_SetFromWindowsErr(0);
1274 return -1;
1275 }
1276
1277 return (flags & HANDLE_FLAG_INHERIT);
1278#else
1279 int flags;
1280
1281 flags = fcntl(fd, F_GETFD, 0);
1282 if (flags == -1) {
1283 if (raise)
1284 PyErr_SetFromErrno(PyExc_OSError);
1285 return -1;
1286 }
1287 return !(flags & FD_CLOEXEC);
1288#endif
1289}
1290
1291/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001292 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001293 raise an exception and return -1 on error. */
1294int
1295_Py_get_inheritable(int fd)
1296{
1297 return get_inheritable(fd, 1);
1298}
1299
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001300
1301/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001302static int
1303set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1304{
1305#ifdef MS_WINDOWS
1306 HANDLE handle;
1307 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001308#else
1309#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1310 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001311 int request;
1312 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001313#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001314 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001315 int res;
1316#endif
1317
1318 /* atomic_flag_works can only be used to make the file descriptor
1319 non-inheritable */
1320 assert(!(atomic_flag_works != NULL && inheritable));
1321
1322 if (atomic_flag_works != NULL && !inheritable) {
1323 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001324 int isInheritable = get_inheritable(fd, raise);
1325 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001326 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001327 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001328 }
1329
1330 if (*atomic_flag_works)
1331 return 0;
1332 }
1333
1334#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03001335 handle = _Py_get_osfhandle_noraise(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001336 if (handle == INVALID_HANDLE_VALUE) {
1337 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001338 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001339 return -1;
1340 }
1341
1342 if (inheritable)
1343 flags = HANDLE_FLAG_INHERIT;
1344 else
1345 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001346
1347 /* This check can be removed once support for Windows 7 ends. */
1348#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1349 GetFileType(handle) == FILE_TYPE_CHAR)
1350
1351 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1352 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001353 if (raise)
1354 PyErr_SetFromWindowsErr(0);
1355 return -1;
1356 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001357#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001358 return 0;
1359
Victor Stinnerdaf45552013-08-28 00:53:59 +02001360#else
Victor Stinner282124b2014-09-02 11:41:04 +02001361
1362#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001363 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001364 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001365 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1366 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001367 if (inheritable)
1368 request = FIONCLEX;
1369 else
1370 request = FIOCLEX;
1371 err = ioctl(fd, request, NULL);
1372 if (!err) {
1373 ioctl_works = 1;
1374 return 0;
1375 }
1376
Miss Islington (bot)2ae22352021-08-06 06:40:44 -07001377#ifdef O_PATH
cptpcrd7dc71c42021-01-20 09:05:51 -05001378 if (errno == EBADF) {
Miss Islington (bot)2ae22352021-08-06 06:40:44 -07001379 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1380 // on O_PATH file descriptors. Fall through to the fcntl()
1381 // implementation.
cptpcrd7dc71c42021-01-20 09:05:51 -05001382 }
1383 else
1384#endif
Victor Stinner3116cc42016-05-19 16:46:18 +02001385 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001386 if (raise)
1387 PyErr_SetFromErrno(PyExc_OSError);
1388 return -1;
1389 }
1390 else {
1391 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1392 device". The ioctl is declared but not supported by the kernel.
1393 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001394 Illumos-based OS for example.
1395
1396 Issue #27057: When SELinux policy disallows ioctl it will fail
1397 with EACCES. While FIOCLEX is safe operation it may be
1398 unavailable because ioctl was denied altogether.
1399 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001400 ioctl_works = 0;
1401 }
1402 /* fallback to fcntl() if ioctl() does not work */
1403 }
1404#endif
1405
1406 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001407 flags = fcntl(fd, F_GETFD);
1408 if (flags < 0) {
1409 if (raise)
1410 PyErr_SetFromErrno(PyExc_OSError);
1411 return -1;
1412 }
1413
Victor Stinnera858bbd2016-04-17 16:51:52 +02001414 if (inheritable) {
1415 new_flags = flags & ~FD_CLOEXEC;
1416 }
1417 else {
1418 new_flags = flags | FD_CLOEXEC;
1419 }
1420
1421 if (new_flags == flags) {
1422 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1423 return 0;
1424 }
1425
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001426 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001427 if (res < 0) {
1428 if (raise)
1429 PyErr_SetFromErrno(PyExc_OSError);
1430 return -1;
1431 }
1432 return 0;
1433#endif
1434}
1435
1436/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001437 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001438static int
1439make_non_inheritable(int fd)
1440{
1441 return set_inheritable(fd, 0, 0, NULL);
1442}
1443
1444/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001445 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001446
1447 If atomic_flag_works is not NULL:
1448
1449 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1450 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1451 set the inheritable flag
1452 * if *atomic_flag_works==1: do nothing
1453 * if *atomic_flag_works==0: set inheritable flag to False
1454
1455 Set atomic_flag_works to NULL if no atomic flag was used to create the
1456 file descriptor.
1457
1458 atomic_flag_works can only be used to make a file descriptor
1459 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1460int
1461_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1462{
1463 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1464}
1465
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001466/* Same as _Py_set_inheritable() but on error, set errno and
1467 don't raise an exception.
1468 This function is async-signal-safe. */
1469int
1470_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1471{
1472 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1473}
1474
Victor Stinnera555cfc2015-03-18 00:22:14 +01001475static int
1476_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001477{
1478 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001479 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001480#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001481 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001482#endif
1483
1484#ifdef MS_WINDOWS
1485 flags |= O_NOINHERIT;
1486#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001487 atomic_flag_works = &_Py_open_cloexec_works;
1488 flags |= O_CLOEXEC;
1489#else
1490 atomic_flag_works = NULL;
1491#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001492
Victor Stinnera555cfc2015-03-18 00:22:14 +01001493 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001494 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1495 if (pathname_obj == NULL) {
1496 return -1;
1497 }
1498 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1499 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001500 return -1;
1501 }
1502
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001503 do {
1504 Py_BEGIN_ALLOW_THREADS
1505 fd = open(pathname, flags);
1506 Py_END_ALLOW_THREADS
1507 } while (fd < 0
1508 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001509 if (async_err) {
1510 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001511 return -1;
1512 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001513 if (fd < 0) {
1514 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1515 Py_DECREF(pathname_obj);
1516 return -1;
1517 }
1518 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001519 }
1520 else {
1521 fd = open(pathname, flags);
1522 if (fd < 0)
1523 return -1;
1524 }
1525
1526#ifndef MS_WINDOWS
1527 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001528 close(fd);
1529 return -1;
1530 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001531#endif
1532
Victor Stinnerdaf45552013-08-28 00:53:59 +02001533 return fd;
1534}
1535
Victor Stinnera555cfc2015-03-18 00:22:14 +01001536/* Open a file with the specified flags (wrapper to open() function).
1537 Return a file descriptor on success. Raise an exception and return -1 on
1538 error.
1539
1540 The file descriptor is created non-inheritable.
1541
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001542 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1543 except if the Python signal handler raises an exception.
1544
Victor Stinner6f4fae82015-04-01 18:34:32 +02001545 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001546int
1547_Py_open(const char *pathname, int flags)
1548{
1549 /* _Py_open() must be called with the GIL held. */
1550 assert(PyGILState_Check());
1551 return _Py_open_impl(pathname, flags, 1);
1552}
1553
1554/* Open a file with the specified flags (wrapper to open() function).
1555 Return a file descriptor on success. Set errno and return -1 on error.
1556
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001557 The file descriptor is created non-inheritable.
1558
1559 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001560int
1561_Py_open_noraise(const char *pathname, int flags)
1562{
1563 return _Py_open_impl(pathname, flags, 0);
1564}
1565
Victor Stinnerdaf45552013-08-28 00:53:59 +02001566/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001567 encoding and use fopen() otherwise.
1568
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001569 The file descriptor is created non-inheritable.
1570
1571 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001572FILE *
1573_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1574{
Victor Stinner4e314432010-10-07 21:45:39 +00001575 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001576 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1577 return NULL;
1578 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001579#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001580 char *cpath;
1581 char cmode[10];
1582 size_t r;
1583 r = wcstombs(cmode, mode, 10);
Victor Stinner99768342021-03-17 21:46:53 +01001584 if (r == DECODE_ERROR || r >= 10) {
Victor Stinner4e314432010-10-07 21:45:39 +00001585 errno = EINVAL;
1586 return NULL;
1587 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001588 cpath = _Py_EncodeLocaleRaw(path, NULL);
1589 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001590 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001591 }
Victor Stinner4e314432010-10-07 21:45:39 +00001592 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001593 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001594#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001595 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001596#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001597 if (f == NULL)
1598 return NULL;
1599 if (make_non_inheritable(fileno(f)) < 0) {
1600 fclose(f);
1601 return NULL;
1602 }
1603 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001604}
1605
Victor Stinnerdaf45552013-08-28 00:53:59 +02001606
1607/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001608 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001609
Victor Stinnere42ccd22015-03-18 01:39:23 +01001610 Return the new file object on success. Raise an exception and return NULL
1611 on error.
1612
1613 The file descriptor is created non-inheritable.
1614
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001615 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1616 except if the Python signal handler raises an exception.
1617
Victor Stinner6f4fae82015-04-01 18:34:32 +02001618 Release the GIL to call _wfopen() or fopen(). The caller must hold
1619 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001620FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001621_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001622{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001623 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001624 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001625#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001626 wchar_t wmode[10];
1627 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001628
Victor Stinnere42ccd22015-03-18 01:39:23 +01001629 assert(PyGILState_Check());
1630
Steve Dowerb82e17e2019-05-23 08:45:22 -07001631 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1632 return NULL;
1633 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001634 if (!PyUnicode_Check(path)) {
1635 PyErr_Format(PyExc_TypeError,
1636 "str file path expected under Windows, got %R",
1637 Py_TYPE(path));
1638 return NULL;
1639 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001640#if USE_UNICODE_WCHAR_CACHE
1641 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1642#else /* USE_UNICODE_WCHAR_CACHE */
1643 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1644#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001645 if (wpath == NULL)
1646 return NULL;
1647
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001648 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1649 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001650 if (usize == 0) {
1651 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001652#if !USE_UNICODE_WCHAR_CACHE
1653 PyMem_Free(wpath);
1654#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001655 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001656 }
Victor Stinner4e314432010-10-07 21:45:39 +00001657
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001658 do {
1659 Py_BEGIN_ALLOW_THREADS
1660 f = _wfopen(wpath, wmode);
1661 Py_END_ALLOW_THREADS
1662 } while (f == NULL
1663 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001664#if !USE_UNICODE_WCHAR_CACHE
1665 PyMem_Free(wpath);
1666#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001667#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001668 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001669 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001670
1671 assert(PyGILState_Check());
1672
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001673 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001674 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001675 path_bytes = PyBytes_AS_STRING(bytes);
1676
Steve Dowerb82e17e2019-05-23 08:45:22 -07001677 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001678 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001679 return NULL;
1680 }
1681
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001682 do {
1683 Py_BEGIN_ALLOW_THREADS
1684 f = fopen(path_bytes, mode);
1685 Py_END_ALLOW_THREADS
1686 } while (f == NULL
1687 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001688
Victor Stinner4e314432010-10-07 21:45:39 +00001689 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001690#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001691 if (async_err)
1692 return NULL;
1693
Victor Stinnere42ccd22015-03-18 01:39:23 +01001694 if (f == NULL) {
1695 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001696 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001697 }
1698
1699 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001700 fclose(f);
1701 return NULL;
1702 }
1703 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001704}
1705
Victor Stinner66aab0c2015-03-19 22:53:20 +01001706/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001707
1708 On success, return the number of read bytes, it can be lower than count.
1709 If the current file offset is at or past the end of file, no bytes are read,
1710 and read() returns zero.
1711
1712 On error, raise an exception, set errno and return -1.
1713
1714 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1715 If the Python signal handler raises an exception, the function returns -1
1716 (the syscall is not retried).
1717
1718 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001719Py_ssize_t
1720_Py_read(int fd, void *buf, size_t count)
1721{
1722 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001723 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001724 int async_err = 0;
1725
Victor Stinner8a1be612016-03-14 22:07:55 +01001726 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001727
Victor Stinner66aab0c2015-03-19 22:53:20 +01001728 /* _Py_read() must not be called with an exception set, otherwise the
1729 * caller may think that read() was interrupted by a signal and the signal
1730 * handler raised an exception. */
1731 assert(!PyErr_Occurred());
1732
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001733 if (count > _PY_READ_MAX) {
1734 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001735 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001736
Steve Dower8fc89802015-04-12 00:26:27 -04001737 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001738 do {
1739 Py_BEGIN_ALLOW_THREADS
1740 errno = 0;
1741#ifdef MS_WINDOWS
1742 n = read(fd, buf, (int)count);
1743#else
1744 n = read(fd, buf, count);
1745#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001746 /* save/restore errno because PyErr_CheckSignals()
1747 * and PyErr_SetFromErrno() can modify it */
1748 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001749 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001750 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001751 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001752 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001753
1754 if (async_err) {
1755 /* read() was interrupted by a signal (failed with EINTR)
1756 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001757 errno = err;
1758 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001759 return -1;
1760 }
1761 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001762 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001763 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001764 return -1;
1765 }
1766
1767 return n;
1768}
1769
Victor Stinner82c3e452015-04-01 18:34:45 +02001770static Py_ssize_t
1771_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001772{
1773 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001774 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001775 int async_err = 0;
1776
Steve Dower8fc89802015-04-12 00:26:27 -04001777 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001778#ifdef MS_WINDOWS
1779 if (count > 32767 && isatty(fd)) {
1780 /* Issue #11395: the Windows console returns an error (12: not
1781 enough space error) on writing into stdout if stdout mode is
1782 binary and the length is greater than 66,000 bytes (or less,
1783 depending on heap usage). */
1784 count = 32767;
1785 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001786#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001787 if (count > _PY_WRITE_MAX) {
1788 count = _PY_WRITE_MAX;
1789 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001790
Victor Stinner82c3e452015-04-01 18:34:45 +02001791 if (gil_held) {
1792 do {
1793 Py_BEGIN_ALLOW_THREADS
1794 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001795#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001796 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001797#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001798 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001799#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001800 /* save/restore errno because PyErr_CheckSignals()
1801 * and PyErr_SetFromErrno() can modify it */
1802 err = errno;
1803 Py_END_ALLOW_THREADS
1804 } while (n < 0 && err == EINTR &&
1805 !(async_err = PyErr_CheckSignals()));
1806 }
1807 else {
1808 do {
1809 errno = 0;
1810#ifdef MS_WINDOWS
1811 n = write(fd, buf, (int)count);
1812#else
1813 n = write(fd, buf, count);
1814#endif
1815 err = errno;
1816 } while (n < 0 && err == EINTR);
1817 }
Steve Dower8fc89802015-04-12 00:26:27 -04001818 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001819
1820 if (async_err) {
1821 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001822 and the Python signal handler raised an exception (if gil_held is
1823 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001824 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001825 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001826 return -1;
1827 }
1828 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001829 if (gil_held)
1830 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001831 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001832 return -1;
1833 }
1834
1835 return n;
1836}
1837
Victor Stinner82c3e452015-04-01 18:34:45 +02001838/* Write count bytes of buf into fd.
1839
1840 On success, return the number of written bytes, it can be lower than count
1841 including 0. On error, raise an exception, set errno and return -1.
1842
1843 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1844 If the Python signal handler raises an exception, the function returns -1
1845 (the syscall is not retried).
1846
1847 Release the GIL to call write(). The caller must hold the GIL. */
1848Py_ssize_t
1849_Py_write(int fd, const void *buf, size_t count)
1850{
Victor Stinner8a1be612016-03-14 22:07:55 +01001851 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001852
Victor Stinner82c3e452015-04-01 18:34:45 +02001853 /* _Py_write() must not be called with an exception set, otherwise the
1854 * caller may think that write() was interrupted by a signal and the signal
1855 * handler raised an exception. */
1856 assert(!PyErr_Occurred());
1857
1858 return _Py_write_impl(fd, buf, count, 1);
1859}
1860
1861/* Write count bytes of buf into fd.
1862 *
1863 * On success, return the number of written bytes, it can be lower than count
1864 * including 0. On error, set errno and return -1.
1865 *
1866 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1867 * without calling the Python signal handler. */
1868Py_ssize_t
1869_Py_write_noraise(int fd, const void *buf, size_t count)
1870{
1871 return _Py_write_impl(fd, buf, count, 0);
1872}
1873
Victor Stinner4e314432010-10-07 21:45:39 +00001874#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001875
1876/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001877 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001878
Victor Stinner1be0d112019-03-18 17:47:26 +01001879 Return -1 on encoding error, on readlink() error, if the internal buffer is
1880 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001881int
Victor Stinner1be0d112019-03-18 17:47:26 +01001882_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001883{
1884 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001885 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001886 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001887 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001888 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001889 size_t r1;
1890
Victor Stinner9dd76202017-12-21 16:20:32 +01001891 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001892 if (cpath == NULL) {
1893 errno = EINVAL;
1894 return -1;
1895 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001896 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001897 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001898 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001899 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001900 }
1901 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001902 errno = EINVAL;
1903 return -1;
1904 }
1905 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001906 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001907 if (wbuf == NULL) {
1908 errno = EINVAL;
1909 return -1;
1910 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001911 /* wbuf must have space to store the trailing NUL character */
1912 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001913 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001914 errno = EINVAL;
1915 return -1;
1916 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001917 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001918 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001919 return (int)r1;
1920}
1921#endif
1922
1923#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001924
1925/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001926 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001927
Victor Stinner1be0d112019-03-18 17:47:26 +01001928 Return NULL on encoding error, realpath() error, decoding error
1929 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001930wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001931_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001932 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001933{
1934 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001935 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001936 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001937 char *res;
1938 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001939 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001940 if (cpath == NULL) {
1941 errno = EINVAL;
1942 return NULL;
1943 }
1944 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001945 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001946 if (res == NULL)
1947 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001948
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001949 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001950 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001951 errno = EINVAL;
1952 return NULL;
1953 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001954 /* wresolved_path must have space to store the trailing NUL character */
1955 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001956 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001957 errno = EINVAL;
1958 return NULL;
1959 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001960 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001961 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001962 return resolved_path;
1963}
1964#endif
1965
Victor Stinner3939c322019-06-25 15:02:43 +02001966
1967#ifndef MS_WINDOWS
1968int
1969_Py_isabs(const wchar_t *path)
1970{
1971 return (path[0] == SEP);
1972}
1973#endif
1974
1975
1976/* Get an absolute path.
1977 On error (ex: fail to get the current directory), return -1.
1978 On memory allocation failure, set *abspath_p to NULL and return 0.
1979 On success, return a newly allocated to *abspath_p to and return 0.
1980 The string must be freed by PyMem_RawFree(). */
1981int
1982_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1983{
1984#ifdef MS_WINDOWS
1985 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1986 DWORD result;
1987
1988 result = GetFullPathNameW(path,
1989 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1990 NULL);
1991 if (!result) {
1992 return -1;
1993 }
1994
1995 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1996 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1997 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1998 }
1999 else {
2000 woutbufp = NULL;
2001 }
2002 if (!woutbufp) {
2003 *abspath_p = NULL;
2004 return 0;
2005 }
2006
2007 result = GetFullPathNameW(path, result, woutbufp, NULL);
2008 if (!result) {
2009 PyMem_RawFree(woutbufp);
2010 return -1;
2011 }
2012 }
2013
2014 if (woutbufp != woutbuf) {
2015 *abspath_p = woutbufp;
2016 return 0;
2017 }
2018
2019 *abspath_p = _PyMem_RawWcsdup(woutbufp);
2020 return 0;
2021#else
2022 if (_Py_isabs(path)) {
2023 *abspath_p = _PyMem_RawWcsdup(path);
2024 return 0;
2025 }
2026
2027 wchar_t cwd[MAXPATHLEN + 1];
2028 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2029 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2030 /* unable to get the current directory */
2031 return -1;
2032 }
2033
2034 size_t cwd_len = wcslen(cwd);
2035 size_t path_len = wcslen(path);
2036 size_t len = cwd_len + 1 + path_len + 1;
2037 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2038 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2039 }
2040 else {
2041 *abspath_p = NULL;
2042 }
2043 if (*abspath_p == NULL) {
2044 return 0;
2045 }
2046
2047 wchar_t *abspath = *abspath_p;
2048 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2049 abspath += cwd_len;
2050
2051 *abspath = (wchar_t)SEP;
2052 abspath++;
2053
2054 memcpy(abspath, path, path_len * sizeof(wchar_t));
2055 abspath += path_len;
2056
2057 *abspath = 0;
2058 return 0;
2059#endif
2060}
2061
2062
Victor Stinnerfaddaed2019-03-19 02:58:14 +01002063/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01002064 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00002065
Victor Stinner1be0d112019-03-18 17:47:26 +01002066 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2067 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00002068wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01002069_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00002070{
2071#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01002072 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2073 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00002074#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002075 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00002076 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00002077 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00002078
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01002079 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00002080 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02002081 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00002082 if (wname == NULL)
2083 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01002084 /* wname must have space to store the trailing NUL character */
2085 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02002086 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002087 return NULL;
2088 }
Victor Stinner1be0d112019-03-18 17:47:26 +01002089 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02002090 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00002091 return buf;
2092#endif
2093}
2094
Victor Stinnerdaf45552013-08-28 00:53:59 +02002095/* Duplicate a file descriptor. The new file descriptor is created as
2096 non-inheritable. Return a new file descriptor on success, raise an OSError
2097 exception and return -1 on error.
2098
2099 The GIL is released to call dup(). The caller must hold the GIL. */
2100int
2101_Py_dup(int fd)
2102{
2103#ifdef MS_WINDOWS
2104 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002105#endif
2106
Victor Stinner8a1be612016-03-14 22:07:55 +01002107 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01002108
Victor Stinnerdaf45552013-08-28 00:53:59 +02002109#ifdef MS_WINDOWS
Segev Finer5e437fb2021-04-24 01:00:27 +03002110 handle = _Py_get_osfhandle(fd);
2111 if (handle == INVALID_HANDLE_VALUE)
Victor Stinnerdaf45552013-08-28 00:53:59 +02002112 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002113
Victor Stinnerdaf45552013-08-28 00:53:59 +02002114 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002115 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002116 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002117 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002118 Py_END_ALLOW_THREADS
2119 if (fd < 0) {
2120 PyErr_SetFromErrno(PyExc_OSError);
2121 return -1;
2122 }
2123
Zackery Spytz28fca0c2019-06-17 01:17:14 -06002124 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2125 _Py_BEGIN_SUPPRESS_IPH
2126 close(fd);
2127 _Py_END_SUPPRESS_IPH
2128 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002129 }
2130#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2131 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002132 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002133 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002134 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002135 Py_END_ALLOW_THREADS
2136 if (fd < 0) {
2137 PyErr_SetFromErrno(PyExc_OSError);
2138 return -1;
2139 }
2140
2141#else
2142 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002143 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002144 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002145 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002146 Py_END_ALLOW_THREADS
2147 if (fd < 0) {
2148 PyErr_SetFromErrno(PyExc_OSError);
2149 return -1;
2150 }
2151
2152 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002153 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002154 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002155 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002156 return -1;
2157 }
2158#endif
2159 return fd;
2160}
2161
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002162#ifndef MS_WINDOWS
2163/* Get the blocking mode of the file descriptor.
2164 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2165 raise an exception and return -1 on error. */
2166int
2167_Py_get_blocking(int fd)
2168{
Steve Dower8fc89802015-04-12 00:26:27 -04002169 int flags;
2170 _Py_BEGIN_SUPPRESS_IPH
2171 flags = fcntl(fd, F_GETFL, 0);
2172 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002173 if (flags < 0) {
2174 PyErr_SetFromErrno(PyExc_OSError);
2175 return -1;
2176 }
2177
2178 return !(flags & O_NONBLOCK);
2179}
2180
2181/* Set the blocking mode of the specified file descriptor.
2182
2183 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2184 otherwise.
2185
2186 Return 0 on success, raise an exception and return -1 on error. */
2187int
2188_Py_set_blocking(int fd, int blocking)
2189{
pxinwr06afac62020-12-08 04:41:12 +08002190/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2191 Use fcntl() instead. */
2192#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002193 int arg = !blocking;
2194 if (ioctl(fd, FIONBIO, &arg) < 0)
2195 goto error;
2196#else
2197 int flags, res;
2198
Steve Dower8fc89802015-04-12 00:26:27 -04002199 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002200 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002201 if (flags >= 0) {
2202 if (blocking)
2203 flags = flags & (~O_NONBLOCK);
2204 else
2205 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002206
Steve Dower8fc89802015-04-12 00:26:27 -04002207 res = fcntl(fd, F_SETFL, flags);
2208 } else {
2209 res = -1;
2210 }
2211 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002212
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002213 if (res < 0)
2214 goto error;
2215#endif
2216 return 0;
2217
2218error:
2219 PyErr_SetFromErrno(PyExc_OSError);
2220 return -1;
2221}
Segev Finer5e437fb2021-04-24 01:00:27 +03002222#else /* MS_WINDOWS */
2223void*
2224_Py_get_osfhandle_noraise(int fd)
2225{
2226 void *handle;
2227 _Py_BEGIN_SUPPRESS_IPH
2228 handle = (void*)_get_osfhandle(fd);
2229 _Py_END_SUPPRESS_IPH
2230 return handle;
2231}
Victor Stinnercb064fc2018-01-15 15:58:02 +01002232
Segev Finer5e437fb2021-04-24 01:00:27 +03002233void*
2234_Py_get_osfhandle(int fd)
2235{
2236 void *handle = _Py_get_osfhandle_noraise(fd);
2237 if (handle == INVALID_HANDLE_VALUE)
2238 PyErr_SetFromErrno(PyExc_OSError);
2239
2240 return handle;
2241}
2242
2243int
2244_Py_open_osfhandle_noraise(void *handle, int flags)
2245{
2246 int fd;
2247 _Py_BEGIN_SUPPRESS_IPH
2248 fd = _open_osfhandle((intptr_t)handle, flags);
2249 _Py_END_SUPPRESS_IPH
2250 return fd;
2251}
2252
2253int
2254_Py_open_osfhandle(void *handle, int flags)
2255{
2256 int fd = _Py_open_osfhandle_noraise(handle, flags);
2257 if (fd == -1)
2258 PyErr_SetFromErrno(PyExc_OSError);
2259
2260 return fd;
2261}
2262#endif /* MS_WINDOWS */
Victor Stinnercb064fc2018-01-15 15:58:02 +01002263
2264int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002265_Py_GetLocaleconvNumeric(struct lconv *lc,
2266 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002267{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002268 assert(decimal_point != NULL);
2269 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002270
TIGirardif2312032020-10-20 08:39:52 -03002271#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002272 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002273 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002274 change_locale = 1;
2275 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002276 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002277 change_locale = 1;
2278 }
2279
2280 /* Keep a copy of the LC_CTYPE locale */
2281 char *oldloc = NULL, *loc = NULL;
2282 if (change_locale) {
2283 oldloc = setlocale(LC_CTYPE, NULL);
2284 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002285 PyErr_SetString(PyExc_RuntimeWarning,
2286 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002287 return -1;
2288 }
2289
2290 oldloc = _PyMem_Strdup(oldloc);
2291 if (!oldloc) {
2292 PyErr_NoMemory();
2293 return -1;
2294 }
2295
2296 loc = setlocale(LC_NUMERIC, NULL);
2297 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2298 loc = NULL;
2299 }
2300
2301 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002302 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002303 if LC_NUMERIC locale is different than LC_CTYPE locale and
2304 decimal_point and/or thousands_sep are non-ASCII or longer than
2305 1 byte */
2306 setlocale(LC_CTYPE, loc);
2307 }
2308 }
2309
TIGirardif2312032020-10-20 08:39:52 -03002310#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2311#else /* MS_WINDOWS */
2312/* Use _W_* fields of Windows strcut lconv */
2313#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2314#endif /* MS_WINDOWS */
2315
Victor Stinner02e6bf72018-11-20 16:20:16 +01002316 int res = -1;
2317
TIGirardif2312032020-10-20 08:39:52 -03002318 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002319 if (*decimal_point == NULL) {
2320 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002321 }
2322
TIGirardif2312032020-10-20 08:39:52 -03002323 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002324 if (*thousands_sep == NULL) {
2325 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002326 }
2327
2328 res = 0;
2329
Victor Stinner02e6bf72018-11-20 16:20:16 +01002330done:
TIGirardif2312032020-10-20 08:39:52 -03002331#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002332 if (loc != NULL) {
2333 setlocale(LC_CTYPE, oldloc);
2334 }
2335 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002336#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002337 return res;
TIGirardif2312032020-10-20 08:39:52 -03002338
2339#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002340}
Kyle Evans79925792020-10-13 15:04:44 -05002341
2342/* Our selection logic for which function to use is as follows:
2343 * 1. If close_range(2) is available, always prefer that; it's better for
2344 * contiguous ranges like this than fdwalk(3) which entails iterating over
2345 * the entire fd space and simply doing nothing for those outside the range.
2346 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2347 * closing up to sysconf(_SC_OPEN_MAX).
2348 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2349 * as that will be more performant if the range happens to have any chunk of
2350 * non-opened fd in the middle.
2351 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2352 */
2353#ifdef __FreeBSD__
2354# define USE_CLOSEFROM
2355#endif /* __FreeBSD__ */
2356
2357#ifdef HAVE_FDWALK
2358# define USE_FDWALK
2359#endif /* HAVE_FDWALK */
2360
2361#ifdef USE_FDWALK
2362static int
2363_fdwalk_close_func(void *lohi, int fd)
2364{
2365 int lo = ((int *)lohi)[0];
2366 int hi = ((int *)lohi)[1];
2367
2368 if (fd >= hi) {
2369 return 1;
2370 }
2371 else if (fd >= lo) {
2372 /* Ignore errors */
2373 (void)close(fd);
2374 }
2375 return 0;
2376}
2377#endif /* USE_FDWALK */
2378
2379/* Closes all file descriptors in [first, last], ignoring errors. */
2380void
2381_Py_closerange(int first, int last)
2382{
2383 first = Py_MAX(first, 0);
2384 _Py_BEGIN_SUPPRESS_IPH
2385#ifdef HAVE_CLOSE_RANGE
2386 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2387 /* Any errors encountered while closing file descriptors are ignored;
2388 * ENOSYS means no kernel support, though,
2389 * so we'll fallback to the other methods. */
2390 }
2391 else
2392#endif /* HAVE_CLOSE_RANGE */
2393#ifdef USE_CLOSEFROM
2394 if (last >= sysconf(_SC_OPEN_MAX)) {
2395 /* Any errors encountered while closing file descriptors are ignored */
2396 closefrom(first);
2397 }
2398 else
2399#endif /* USE_CLOSEFROM */
2400#ifdef USE_FDWALK
2401 {
2402 int lohi[2];
2403 lohi[0] = first;
2404 lohi[1] = last + 1;
2405 fdwalk(_fdwalk_close_func, lohi);
2406 }
2407#else
2408 {
2409 for (int i = first; i <= last; i++) {
2410 /* Ignore errors */
2411 (void)close(i);
2412 }
2413 }
2414#endif /* USE_FDWALK */
2415 _Py_END_SUPPRESS_IPH
2416}