blob: 5177b3728824cdebc6c2afcb01da578697e1cc4c [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
21#ifdef HAVE_FCNTL_H
22#include <fcntl.h>
23#endif /* HAVE_FCNTL_H */
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020026/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020027
28 -1: unknown
29 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
30 1: open() supports O_CLOEXEC flag, close-on-exec is set
31
Victor Stinnera555cfc2015-03-18 00:22:14 +010032 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
33 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020034int _Py_open_cloexec_works = -1;
35#endif
36
Victor Stinner3d4226a2018-08-29 22:21:32 +020037
38static int
39get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
40{
41 switch (errors)
42 {
43 case _Py_ERROR_STRICT:
44 *surrogateescape = 0;
45 return 0;
46 case _Py_ERROR_SURROGATEESCAPE:
47 *surrogateescape = 1;
48 return 0;
49 default:
50 return -1;
51 }
52}
53
54
Brett Cannonefb00c02012-02-29 18:31:31 -050055PyObject *
56_Py_device_encoding(int fd)
57{
Victor Stinner14b9b112013-06-25 00:37:25 +020058#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050059 UINT cp;
60#endif
Steve Dower8fc89802015-04-12 00:26:27 -040061 int valid;
62 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070063 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040064 _Py_END_SUPPRESS_IPH
65 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050066 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040067
Victor Stinner14b9b112013-06-25 00:37:25 +020068#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050069 if (fd == 0)
70 cp = GetConsoleCP();
71 else if (fd == 1 || fd == 2)
72 cp = GetConsoleOutputCP();
73 else
74 cp = 0;
75 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
76 has no console */
77 if (cp != 0)
78 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
79#elif defined(CODESET)
80 {
81 char *codeset = nl_langinfo(CODESET);
82 if (codeset != NULL && codeset[0] != 0)
83 return PyUnicode_FromString(codeset);
84 }
85#endif
86 Py_RETURN_NONE;
87}
88
Victor Stinnere2510952019-05-02 11:28:57 -040089#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +010090
91#define USE_FORCE_ASCII
92
Victor Stinnerd45c7f82012-12-04 01:34:47 +010093extern int _Py_normalize_encoding(const char *, char *, size_t);
94
Victor Stinnerd500e532018-08-28 17:27:36 +020095/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
96 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +010097 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
98 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
99 locale.getpreferredencoding() codec. For example, if command line arguments
100 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
101 UnicodeEncodeError instead of retrieving the original byte string.
102
103 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
104 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
105 one byte in range 0x80-0xff can be decoded from the locale encoding. The
106 workaround is also enabled on error, for example if getting the locale
107 failed.
108
Victor Stinnerd500e532018-08-28 17:27:36 +0200109 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
110 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
111 ASCII encoding in this case.
112
Philip Jenvey215c49a2013-01-15 13:24:12 -0800113 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100114
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200115 1: the workaround is used: Py_EncodeLocale() uses
116 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100117 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200118 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
119 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100120 -1: unknown, need to call check_force_ascii() to get the value
121*/
122static int force_ascii = -1;
123
124static int
125check_force_ascii(void)
126{
Victor Stinnerd500e532018-08-28 17:27:36 +0200127 char *loc = setlocale(LC_CTYPE, NULL);
128 if (loc == NULL) {
129 goto error;
130 }
131 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
132 /* the LC_CTYPE locale is different than C and POSIX */
133 return 0;
134 }
135
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200137 const char *codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142
Victor Stinner54de2b12016-09-09 23:11:52 -0700143 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200144 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
145 goto error;
146 }
147
148#ifdef __hpux
149 if (strcmp(encoding, "roman8") == 0) {
150 unsigned char ch;
151 wchar_t wch;
152 size_t res;
153
154 ch = (unsigned char)0xA7;
155 res = mbstowcs(&wch, (char*)&ch, 1);
156 if (res != (size_t)-1 && wch == L'\xA7') {
157 /* On HP-UX withe C locale or the POSIX locale,
158 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
159 Latin1 encoding in practice. Force ASCII in this case.
160
161 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
162 return 1;
163 }
164 }
165#else
166 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100167 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700168 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100169 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700170 "ansi_x3.4_1968",
171 "ansi_x3.4_1986",
172 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100173 "cp367",
174 "csascii",
175 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700176 "iso646_us",
177 "iso_646.irv_1991",
178 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100179 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700180 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100181 NULL
182 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100183
Victor Stinnerd500e532018-08-28 17:27:36 +0200184 int is_ascii = 0;
185 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100186 if (strcmp(encoding, *alias) == 0) {
187 is_ascii = 1;
188 break;
189 }
190 }
191 if (!is_ascii) {
192 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
193 return 0;
194 }
195
Victor Stinnerd500e532018-08-28 17:27:36 +0200196 for (unsigned int i=0x80; i<=0xff; i++) {
197 char ch[1];
198 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100199 size_t res;
200
Victor Stinnerd500e532018-08-28 17:27:36 +0200201 unsigned uch = (unsigned char)i;
202 ch[0] = (char)uch;
203 res = mbstowcs(wch, ch, 1);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100204 if (res != (size_t)-1) {
205 /* decoding a non-ASCII character from the locale encoding succeed:
206 the locale encoding is not ASCII, force ASCII */
207 return 1;
208 }
209 }
210 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
211 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200212#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100213 return 0;
214#else
215 /* nl_langinfo(CODESET) is not available: always force ASCII */
216 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200217#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100218
219error:
Martin Panter46f50722016-05-26 05:35:26 +0000220 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100221 return 1;
222}
223
Victor Stinnerd500e532018-08-28 17:27:36 +0200224
225int
226_Py_GetForceASCII(void)
227{
228 if (force_ascii == -1) {
229 force_ascii = check_force_ascii();
230 }
231 return force_ascii;
232}
233
234
Victor Stinner353933e2018-11-23 13:08:26 +0100235void
236_Py_ResetForceASCII(void)
237{
238 force_ascii = -1;
239}
240
241
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100242static int
243encode_ascii(const wchar_t *text, char **str,
244 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200245 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100246{
247 char *result = NULL, *out;
248 size_t len, i;
249 wchar_t ch;
250
Victor Stinner3d4226a2018-08-29 22:21:32 +0200251 int surrogateescape;
252 if (get_surrogateescape(errors, &surrogateescape) < 0) {
253 return -3;
254 }
255
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100256 len = wcslen(text);
257
Victor Stinner9bee3292017-12-21 16:49:13 +0100258 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100259 if (raw_malloc) {
260 result = PyMem_RawMalloc(len + 1);
261 }
262 else {
263 result = PyMem_Malloc(len + 1);
264 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100265 if (result == NULL) {
266 return -1;
267 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100268
269 out = result;
270 for (i=0; i<len; i++) {
271 ch = text[i];
272
273 if (ch <= 0x7f) {
274 /* ASCII character */
275 *out++ = (char)ch;
276 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100277 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100278 /* UTF-8b surrogate */
279 *out++ = (char)(ch - 0xdc00);
280 }
281 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100282 if (raw_malloc) {
283 PyMem_RawFree(result);
284 }
285 else {
286 PyMem_Free(result);
287 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100288 if (error_pos != NULL) {
289 *error_pos = i;
290 }
291 if (reason) {
292 *reason = "encoding error";
293 }
294 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100295 }
296 }
297 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100298 *str = result;
299 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100300}
Victor Stinnerd500e532018-08-28 17:27:36 +0200301#else
302int
303_Py_GetForceASCII(void)
304{
305 return 0;
306}
Victor Stinner353933e2018-11-23 13:08:26 +0100307
308void
309_Py_ResetForceASCII(void)
310{
311 /* nothing to do */
312}
Victor Stinnere2510952019-05-02 11:28:57 -0400313#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100314
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100315
316#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
317static int
318decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200319 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100320{
321 wchar_t *res;
322 unsigned char *in;
323 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600324 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100325
Victor Stinner3d4226a2018-08-29 22:21:32 +0200326 int surrogateescape;
327 if (get_surrogateescape(errors, &surrogateescape) < 0) {
328 return -3;
329 }
330
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100331 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
332 return -1;
333 }
334 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
335 if (!res) {
336 return -1;
337 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100338
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100339 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100340 for (in = (unsigned char*)arg; *in; in++) {
341 unsigned char ch = *in;
342 if (ch < 128) {
343 *out++ = ch;
344 }
345 else {
346 if (!surrogateescape) {
347 PyMem_RawFree(res);
348 if (wlen) {
349 *wlen = in - (unsigned char*)arg;
350 }
351 if (reason) {
352 *reason = "decoding error";
353 }
354 return -2;
355 }
356 *out++ = 0xdc00 + ch;
357 }
358 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100359 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100360
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100361 if (wlen != NULL) {
362 *wlen = out - res;
363 }
364 *wstr = res;
365 return 0;
366}
367#endif /* !HAVE_MBRTOWC */
368
369static int
370decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200371 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000372{
373 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100374 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000375 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200376#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000377 unsigned char *in;
378 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000379 mbstate_t mbs;
380#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100381
Victor Stinner3d4226a2018-08-29 22:21:32 +0200382 int surrogateescape;
383 if (get_surrogateescape(errors, &surrogateescape) < 0) {
384 return -3;
385 }
386
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100387#ifdef HAVE_BROKEN_MBSTOWCS
388 /* Some platforms have a broken implementation of
389 * mbstowcs which does not count the characters that
390 * would result from conversion. Use an upper bound.
391 */
392 argsize = strlen(arg);
393#else
394 argsize = mbstowcs(NULL, arg, 0);
395#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000396 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100397 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
398 return -1;
399 }
400 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
401 if (!res) {
402 return -1;
403 }
404
405 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000406 if (count != (size_t)-1) {
407 wchar_t *tmp;
408 /* Only use the result if it contains no
409 surrogate characters. */
410 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100411 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000412 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000413 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100414 if (wlen != NULL) {
415 *wlen = count;
416 }
417 *wstr = res;
418 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000419 }
Victor Stinner4e314432010-10-07 21:45:39 +0000420 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200421 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000422 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100423
Victor Stinner4e314432010-10-07 21:45:39 +0000424 /* Conversion failed. Fall back to escaping with surrogateescape. */
425#ifdef HAVE_MBRTOWC
426 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
427
428 /* Overallocate; as multi-byte characters are in the argument, the
429 actual output could use less memory. */
430 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100431 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
432 return -1;
433 }
434 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
435 if (!res) {
436 return -1;
437 }
438
Victor Stinner4e314432010-10-07 21:45:39 +0000439 in = (unsigned char*)arg;
440 out = res;
441 memset(&mbs, 0, sizeof mbs);
442 while (argsize) {
443 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100444 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000445 /* Reached end of string; null char stored. */
446 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100447 }
448
Victor Stinner4e314432010-10-07 21:45:39 +0000449 if (converted == (size_t)-2) {
450 /* Incomplete character. This should never happen,
451 since we provide everything that we have -
452 unless there is a bug in the C library, or I
453 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100454 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000455 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100456
Victor Stinner4e314432010-10-07 21:45:39 +0000457 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100458 if (!surrogateescape) {
459 goto decode_error;
460 }
461
Victor Stinner4e314432010-10-07 21:45:39 +0000462 /* Conversion error. Escape as UTF-8b, and start over
463 in the initial shift state. */
464 *out++ = 0xdc00 + *in++;
465 argsize--;
466 memset(&mbs, 0, sizeof mbs);
467 continue;
468 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100469
Victor Stinner76df43d2012-10-30 01:42:39 +0100470 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100471 if (!surrogateescape) {
472 goto decode_error;
473 }
474
Victor Stinner4e314432010-10-07 21:45:39 +0000475 /* Surrogate character. Escape the original
476 byte sequence with surrogateescape. */
477 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100478 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000479 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100480 }
Victor Stinner4e314432010-10-07 21:45:39 +0000481 continue;
482 }
483 /* successfully converted some bytes */
484 in += converted;
485 argsize -= converted;
486 out++;
487 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100488 if (wlen != NULL) {
489 *wlen = out - res;
490 }
491 *wstr = res;
492 return 0;
493
494decode_error:
495 PyMem_RawFree(res);
496 if (wlen) {
497 *wlen = in - (unsigned char*)arg;
498 }
499 if (reason) {
500 *reason = "decoding error";
501 }
502 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100503#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000504 /* Cannot use C locale for escaping; manually escape as if charset
505 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
506 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200507 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100508#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100509}
510
511
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100512/* Decode a byte string from the locale encoding.
513
514 Use the strict error handler if 'surrogateescape' is zero. Use the
515 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
516 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
517 can be decoded as a surrogate character, escape the bytes using the
518 surrogateescape error handler instead of decoding them.
519
Ville Skyttä61f82e02018-04-20 23:08:45 +0300520 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100521 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
522 the number of wide characters excluding the null character into *wlen.
523
524 On memory allocation failure, return -1.
525
526 On decoding error, return -2. If wlen is not NULL, write the start of
527 invalid byte sequence in the input string into *wlen. If reason is not NULL,
528 write the decoding error message into *reason.
529
Victor Stinner3d4226a2018-08-29 22:21:32 +0200530 Return -3 if the error handler 'errors' is not supported.
531
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100532 Use the Py_EncodeLocaleEx() function to encode the character string back to
533 a byte string. */
534int
535_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
536 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200537 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100538{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100539 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400540#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100541 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200542 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100543#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200544 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100545#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100546 }
547
Victor Stinnere2510952019-05-02 11:28:57 -0400548#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100549 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200550 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100551#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200552 int use_utf8 = (Py_UTF8Mode == 1);
553#ifdef MS_WINDOWS
554 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
555#endif
556 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200557 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
558 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100559 }
560
561#ifdef USE_FORCE_ASCII
562 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100563 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100564 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100565
566 if (force_ascii) {
567 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200568 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100569 }
570#endif
571
Victor Stinner3d4226a2018-08-29 22:21:32 +0200572 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400573#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100574}
575
576
Victor Stinner91106cd2017-12-13 12:29:09 +0100577/* Decode a byte string from the locale encoding with the
578 surrogateescape error handler: undecodable bytes are decoded as characters
579 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
580 character, escape the bytes using the surrogateescape error handler instead
581 of decoding them.
582
583 Return a pointer to a newly allocated wide character string, use
584 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
585 wide characters excluding the null character into *size
586
587 Return NULL on decoding error or memory allocation error. If *size* is not
588 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
589 decoding error.
590
591 Decoding errors should never happen, unless there is a bug in the C
592 library.
593
594 Use the Py_EncodeLocale() function to encode the character string back to a
595 byte string. */
596wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100597Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100598{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100599 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200600 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
601 NULL, 0,
602 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100603 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200604 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100605 if (wlen != NULL) {
606 *wlen = (size_t)res;
607 }
608 return NULL;
609 }
610 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100611}
Victor Stinner91106cd2017-12-13 12:29:09 +0100612
Victor Stinner91106cd2017-12-13 12:29:09 +0100613
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100614static int
615encode_current_locale(const wchar_t *text, char **str,
616 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200617 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100618{
Victor Stinner4e314432010-10-07 21:45:39 +0000619 const size_t len = wcslen(text);
620 char *result = NULL, *bytes = NULL;
621 size_t i, size, converted;
622 wchar_t c, buf[2];
623
Victor Stinner3d4226a2018-08-29 22:21:32 +0200624 int surrogateescape;
625 if (get_surrogateescape(errors, &surrogateescape) < 0) {
626 return -3;
627 }
628
Victor Stinner4e314432010-10-07 21:45:39 +0000629 /* The function works in two steps:
630 1. compute the length of the output buffer in bytes (size)
631 2. outputs the bytes */
632 size = 0;
633 buf[1] = 0;
634 while (1) {
635 for (i=0; i < len; i++) {
636 c = text[i];
637 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100638 if (!surrogateescape) {
639 goto encode_error;
640 }
Victor Stinner4e314432010-10-07 21:45:39 +0000641 /* UTF-8b surrogate */
642 if (bytes != NULL) {
643 *bytes++ = c - 0xdc00;
644 size--;
645 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100646 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000647 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100648 }
Victor Stinner4e314432010-10-07 21:45:39 +0000649 continue;
650 }
651 else {
652 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100653 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000654 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100655 }
656 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000657 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100658 }
Victor Stinner4e314432010-10-07 21:45:39 +0000659 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100660 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000661 }
662 if (bytes != NULL) {
663 bytes += converted;
664 size -= converted;
665 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100666 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000667 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100668 }
Victor Stinner4e314432010-10-07 21:45:39 +0000669 }
670 }
671 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100672 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000673 break;
674 }
675
676 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100677 if (raw_malloc) {
678 result = PyMem_RawMalloc(size);
679 }
680 else {
681 result = PyMem_Malloc(size);
682 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100683 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100685 }
Victor Stinner4e314432010-10-07 21:45:39 +0000686 bytes = result;
687 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100688 *str = result;
689 return 0;
690
691encode_error:
692 if (raw_malloc) {
693 PyMem_RawFree(result);
694 }
695 else {
696 PyMem_Free(result);
697 }
698 if (error_pos != NULL) {
699 *error_pos = i;
700 }
701 if (reason) {
702 *reason = "encoding error";
703 }
704 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100705}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100706
Victor Stinner3d4226a2018-08-29 22:21:32 +0200707
708/* Encode a string to the locale encoding.
709
710 Parameters:
711
712 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
713 of PyMem_Malloc().
714 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
715 Python filesystem encoding.
716 * errors: error handler like "strict" or "surrogateescape".
717
718 Return value:
719
720 0: success, *str is set to a newly allocated decoded string.
721 -1: memory allocation failure
722 -2: encoding error, set *error_pos and *reason (if set).
723 -3: the error handler 'errors' is not supported.
724 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100725static int
726encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
727 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200728 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100729{
730 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400731#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100732 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200733 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100734#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100735 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200736 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100737#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100738 }
739
Victor Stinnere2510952019-05-02 11:28:57 -0400740#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100741 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200742 raw_malloc, errors);
743#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200744 int use_utf8 = (Py_UTF8Mode == 1);
745#ifdef MS_WINDOWS
746 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
747#endif
748 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100749 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200750 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100751 }
752
753#ifdef USE_FORCE_ASCII
754 if (force_ascii == -1) {
755 force_ascii = check_force_ascii();
756 }
757
758 if (force_ascii) {
759 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200760 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100761 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100762#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100763
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100764 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200765 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400766#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100767}
768
Victor Stinner9dd76202017-12-21 16:20:32 +0100769static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100770encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100771 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100772{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100773 char *str;
774 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200775 raw_malloc, current_locale,
776 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100777 if (res != -2 && error_pos) {
778 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100779 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100780 if (res != 0) {
781 return NULL;
782 }
783 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100784}
785
Victor Stinner91106cd2017-12-13 12:29:09 +0100786/* Encode a wide character string to the locale encoding with the
787 surrogateescape error handler: surrogate characters in the range
788 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
789
790 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
791 the memory. Return NULL on encoding or memory allocation error.
792
793 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
794 to the index of the invalid character on encoding error.
795
796 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
797 character string. */
798char*
799Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
800{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100801 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100802}
Victor Stinner91106cd2017-12-13 12:29:09 +0100803
Victor Stinner91106cd2017-12-13 12:29:09 +0100804
Victor Stinner9dd76202017-12-21 16:20:32 +0100805/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
806 instead of PyMem_Free(). */
807char*
808_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
809{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100810 return encode_locale(text, error_pos, 1, 0);
811}
812
813
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100814int
815_Py_EncodeLocaleEx(const wchar_t *text, char **str,
816 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200817 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100818{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100819 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200820 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000821}
822
Victor Stinner6672d0c2010-10-07 22:53:43 +0000823
Victor Stinner82458b62020-11-01 20:59:35 +0100824// Get the current locale encoding name:
825//
826// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
827// - Return "UTF-8" if the UTF-8 Mode is enabled
828// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100829// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100830// - Otherwise, return nl_langinfo(CODESET).
831//
Victor Stinnere662c392020-11-01 23:07:23 +0100832// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100833//
Victor Stinner710e8262020-10-31 01:02:09 +0100834// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100835wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100836_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100837{
838#ifdef _Py_FORCE_UTF8_LOCALE
839 // On Android langinfo.h and CODESET are missing,
840 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100841 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100842#else
843 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
844 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100845 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100846 }
847
Victor Stinner82458b62020-11-01 20:59:35 +0100848#ifdef MS_WINDOWS
849 wchar_t encoding[23];
850 unsigned int ansi_codepage = GetACP();
851 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
852 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
853 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100854#else
855 const char *encoding = nl_langinfo(CODESET);
856 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100857 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
858 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100859 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100860 }
Victor Stinner710e8262020-10-31 01:02:09 +0100861
Victor Stinner82458b62020-11-01 20:59:35 +0100862 wchar_t *wstr;
863 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100864 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100865 if (res < 0) {
866 return NULL;
867 }
868 return wstr;
869#endif // !MS_WINDOWS
870
871#endif // !_Py_FORCE_UTF8_LOCALE
872}
873
874
875PyObject *
876_Py_GetLocaleEncodingObject(void)
877{
Victor Stinnere662c392020-11-01 23:07:23 +0100878 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100879 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100880 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100881 return NULL;
882 }
883
884 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
885 PyMem_RawFree(encoding);
886 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100887}
888
889
Steve Dowerf2f373f2015-02-21 08:44:05 -0800890#ifdef MS_WINDOWS
891static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
892
893static void
894FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
895{
896 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
897 /* Cannot simply cast and dereference in_ptr,
898 since it might not be aligned properly */
899 __int64 in;
900 memcpy(&in, in_ptr, sizeof(in));
901 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
902 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
903}
904
905void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800906_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800907{
908 /* XXX endianness */
909 __int64 out;
910 out = time_in + secs_between_epochs;
911 out = out * 10000000 + nsec_in / 100;
912 memcpy(out_ptr, &out, sizeof(out));
913}
914
915/* Below, we *know* that ugo+r is 0444 */
916#if _S_IREAD != 0400
917#error Unsupported C library
918#endif
919static int
920attributes_to_mode(DWORD attr)
921{
922 int m = 0;
923 if (attr & FILE_ATTRIBUTE_DIRECTORY)
924 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
925 else
926 m |= _S_IFREG;
927 if (attr & FILE_ATTRIBUTE_READONLY)
928 m |= 0444;
929 else
930 m |= 0666;
931 return m;
932}
933
Steve Dowerbf1f3762015-02-21 15:26:02 -0800934void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200935_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
936 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800937{
938 memset(result, 0, sizeof(*result));
939 result->st_mode = attributes_to_mode(info->dwFileAttributes);
940 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
941 result->st_dev = info->dwVolumeSerialNumber;
942 result->st_rdev = result->st_dev;
943 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
944 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
945 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
946 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100947 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -0700948 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
949 open other name surrogate reparse points without traversing them. To
950 detect/handle these, check st_file_attributes and st_reparse_tag. */
951 result->st_reparse_tag = reparse_tag;
952 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
953 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -0800954 /* first clear the S_IFMT bits */
955 result->st_mode ^= (result->st_mode & S_IFMT);
956 /* now set the bits that make this a symlink */
957 result->st_mode |= S_IFLNK;
958 }
959 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800960}
961#endif
962
963/* Return information about a file.
964
965 On POSIX, use fstat().
966
967 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800968 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
969 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800970 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200971
972 On Windows, set the last Windows error and return nonzero on error. On
973 POSIX, set errno and return nonzero on error. Fill status and return 0 on
974 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800975int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200976_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800977{
978#ifdef MS_WINDOWS
979 BY_HANDLE_FILE_INFORMATION info;
980 HANDLE h;
981 int type;
982
Steve Dower940f33a2016-09-08 11:21:54 -0700983 _Py_BEGIN_SUPPRESS_IPH
984 h = (HANDLE)_get_osfhandle(fd);
985 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800986
987 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400988 /* errno is already set by _get_osfhandle, but we also set
989 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800990 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800991 return -1;
992 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200993 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800994
995 type = GetFileType(h);
996 if (type == FILE_TYPE_UNKNOWN) {
997 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400998 if (error != 0) {
999 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001000 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -04001001 }
Steve Dowerf2f373f2015-02-21 08:44:05 -08001002 /* else: valid but unknown file */
1003 }
1004
1005 if (type != FILE_TYPE_DISK) {
1006 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001007 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001008 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001009 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001010 return 0;
1011 }
1012
1013 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001014 /* The Win32 error is already set, but we also set errno for
1015 callers who expect it */
1016 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001017 return -1;
1018 }
1019
Victor Stinnere134a7f2015-03-30 10:09:31 +02001020 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001021 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001022 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001023 return 0;
1024#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001025 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001026#endif
1027}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001028
Victor Stinnere134a7f2015-03-30 10:09:31 +02001029/* Return information about a file.
1030
1031 On POSIX, use fstat().
1032
1033 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001034 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1035 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001036 #23152.
1037
1038 Raise an exception and return -1 on error. On Windows, set the last Windows
1039 error on error. On POSIX, set errno on error. Fill status and return 0 on
1040 success.
1041
Victor Stinner6f4fae82015-04-01 18:34:32 +02001042 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1043 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001044int
1045_Py_fstat(int fd, struct _Py_stat_struct *status)
1046{
1047 int res;
1048
Victor Stinner8a1be612016-03-14 22:07:55 +01001049 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001050
Victor Stinnere134a7f2015-03-30 10:09:31 +02001051 Py_BEGIN_ALLOW_THREADS
1052 res = _Py_fstat_noraise(fd, status);
1053 Py_END_ALLOW_THREADS
1054
1055 if (res != 0) {
1056#ifdef MS_WINDOWS
1057 PyErr_SetFromWindowsErr(0);
1058#else
1059 PyErr_SetFromErrno(PyExc_OSError);
1060#endif
1061 return -1;
1062 }
1063 return 0;
1064}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001065
Victor Stinner6672d0c2010-10-07 22:53:43 +00001066/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1067 call stat() otherwise. Only fill st_mode attribute on Windows.
1068
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001069 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1070 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001071
1072int
Victor Stinnera4a75952010-10-07 22:23:10 +00001073_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001074{
1075#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001076 int err;
1077 struct _stat wstatbuf;
1078
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001079#if USE_UNICODE_WCHAR_CACHE
1080 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1081#else /* USE_UNICODE_WCHAR_CACHE */
1082 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1083#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001084 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001085 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001086
Victor Stinneree587ea2011-11-17 00:51:38 +01001087 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001088 if (!err)
1089 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001090#if !USE_UNICODE_WCHAR_CACHE
1091 PyMem_Free(wpath);
1092#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001093 return err;
1094#else
1095 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001096 PyObject *bytes;
1097 char *cpath;
1098
1099 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001100 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001101 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001102
1103 /* check for embedded null bytes */
1104 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1105 Py_DECREF(bytes);
1106 return -2;
1107 }
1108
1109 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001110 Py_DECREF(bytes);
1111 return ret;
1112#endif
1113}
1114
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001115
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001116/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001117static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001118get_inheritable(int fd, int raise)
1119{
1120#ifdef MS_WINDOWS
1121 HANDLE handle;
1122 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001123
Steve Dower8fc89802015-04-12 00:26:27 -04001124 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001125 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001126 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001127 if (handle == INVALID_HANDLE_VALUE) {
1128 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001129 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001130 return -1;
1131 }
1132
1133 if (!GetHandleInformation(handle, &flags)) {
1134 if (raise)
1135 PyErr_SetFromWindowsErr(0);
1136 return -1;
1137 }
1138
1139 return (flags & HANDLE_FLAG_INHERIT);
1140#else
1141 int flags;
1142
1143 flags = fcntl(fd, F_GETFD, 0);
1144 if (flags == -1) {
1145 if (raise)
1146 PyErr_SetFromErrno(PyExc_OSError);
1147 return -1;
1148 }
1149 return !(flags & FD_CLOEXEC);
1150#endif
1151}
1152
1153/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001154 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001155 raise an exception and return -1 on error. */
1156int
1157_Py_get_inheritable(int fd)
1158{
1159 return get_inheritable(fd, 1);
1160}
1161
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001162
1163/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001164static int
1165set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1166{
1167#ifdef MS_WINDOWS
1168 HANDLE handle;
1169 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001170#else
1171#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1172 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001173 int request;
1174 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001175#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001176 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001177 int res;
1178#endif
1179
1180 /* atomic_flag_works can only be used to make the file descriptor
1181 non-inheritable */
1182 assert(!(atomic_flag_works != NULL && inheritable));
1183
1184 if (atomic_flag_works != NULL && !inheritable) {
1185 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001186 int isInheritable = get_inheritable(fd, raise);
1187 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001188 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001189 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001190 }
1191
1192 if (*atomic_flag_works)
1193 return 0;
1194 }
1195
1196#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001197 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001198 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001199 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001200 if (handle == INVALID_HANDLE_VALUE) {
1201 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001202 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001203 return -1;
1204 }
1205
1206 if (inheritable)
1207 flags = HANDLE_FLAG_INHERIT;
1208 else
1209 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001210
1211 /* This check can be removed once support for Windows 7 ends. */
1212#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1213 GetFileType(handle) == FILE_TYPE_CHAR)
1214
1215 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1216 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001217 if (raise)
1218 PyErr_SetFromWindowsErr(0);
1219 return -1;
1220 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001221#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001222 return 0;
1223
Victor Stinnerdaf45552013-08-28 00:53:59 +02001224#else
Victor Stinner282124b2014-09-02 11:41:04 +02001225
1226#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001227 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001228 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001229 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1230 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001231 if (inheritable)
1232 request = FIONCLEX;
1233 else
1234 request = FIOCLEX;
1235 err = ioctl(fd, request, NULL);
1236 if (!err) {
1237 ioctl_works = 1;
1238 return 0;
1239 }
1240
Victor Stinner3116cc42016-05-19 16:46:18 +02001241 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001242 if (raise)
1243 PyErr_SetFromErrno(PyExc_OSError);
1244 return -1;
1245 }
1246 else {
1247 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1248 device". The ioctl is declared but not supported by the kernel.
1249 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001250 Illumos-based OS for example.
1251
1252 Issue #27057: When SELinux policy disallows ioctl it will fail
1253 with EACCES. While FIOCLEX is safe operation it may be
1254 unavailable because ioctl was denied altogether.
1255 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001256 ioctl_works = 0;
1257 }
1258 /* fallback to fcntl() if ioctl() does not work */
1259 }
1260#endif
1261
1262 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001263 flags = fcntl(fd, F_GETFD);
1264 if (flags < 0) {
1265 if (raise)
1266 PyErr_SetFromErrno(PyExc_OSError);
1267 return -1;
1268 }
1269
Victor Stinnera858bbd2016-04-17 16:51:52 +02001270 if (inheritable) {
1271 new_flags = flags & ~FD_CLOEXEC;
1272 }
1273 else {
1274 new_flags = flags | FD_CLOEXEC;
1275 }
1276
1277 if (new_flags == flags) {
1278 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1279 return 0;
1280 }
1281
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001282 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001283 if (res < 0) {
1284 if (raise)
1285 PyErr_SetFromErrno(PyExc_OSError);
1286 return -1;
1287 }
1288 return 0;
1289#endif
1290}
1291
1292/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001293 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001294static int
1295make_non_inheritable(int fd)
1296{
1297 return set_inheritable(fd, 0, 0, NULL);
1298}
1299
1300/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001301 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001302
1303 If atomic_flag_works is not NULL:
1304
1305 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1306 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1307 set the inheritable flag
1308 * if *atomic_flag_works==1: do nothing
1309 * if *atomic_flag_works==0: set inheritable flag to False
1310
1311 Set atomic_flag_works to NULL if no atomic flag was used to create the
1312 file descriptor.
1313
1314 atomic_flag_works can only be used to make a file descriptor
1315 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1316int
1317_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1318{
1319 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1320}
1321
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001322/* Same as _Py_set_inheritable() but on error, set errno and
1323 don't raise an exception.
1324 This function is async-signal-safe. */
1325int
1326_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1327{
1328 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1329}
1330
Victor Stinnera555cfc2015-03-18 00:22:14 +01001331static int
1332_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001333{
1334 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001335 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001336#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001337 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001338#endif
1339
1340#ifdef MS_WINDOWS
1341 flags |= O_NOINHERIT;
1342#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001343 atomic_flag_works = &_Py_open_cloexec_works;
1344 flags |= O_CLOEXEC;
1345#else
1346 atomic_flag_works = NULL;
1347#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001348
Victor Stinnera555cfc2015-03-18 00:22:14 +01001349 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001350 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1351 if (pathname_obj == NULL) {
1352 return -1;
1353 }
1354 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1355 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001356 return -1;
1357 }
1358
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001359 do {
1360 Py_BEGIN_ALLOW_THREADS
1361 fd = open(pathname, flags);
1362 Py_END_ALLOW_THREADS
1363 } while (fd < 0
1364 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001365 if (async_err) {
1366 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001367 return -1;
1368 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001369 if (fd < 0) {
1370 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1371 Py_DECREF(pathname_obj);
1372 return -1;
1373 }
1374 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001375 }
1376 else {
1377 fd = open(pathname, flags);
1378 if (fd < 0)
1379 return -1;
1380 }
1381
1382#ifndef MS_WINDOWS
1383 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001384 close(fd);
1385 return -1;
1386 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001387#endif
1388
Victor Stinnerdaf45552013-08-28 00:53:59 +02001389 return fd;
1390}
1391
Victor Stinnera555cfc2015-03-18 00:22:14 +01001392/* Open a file with the specified flags (wrapper to open() function).
1393 Return a file descriptor on success. Raise an exception and return -1 on
1394 error.
1395
1396 The file descriptor is created non-inheritable.
1397
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001398 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1399 except if the Python signal handler raises an exception.
1400
Victor Stinner6f4fae82015-04-01 18:34:32 +02001401 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001402int
1403_Py_open(const char *pathname, int flags)
1404{
1405 /* _Py_open() must be called with the GIL held. */
1406 assert(PyGILState_Check());
1407 return _Py_open_impl(pathname, flags, 1);
1408}
1409
1410/* Open a file with the specified flags (wrapper to open() function).
1411 Return a file descriptor on success. Set errno and return -1 on error.
1412
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001413 The file descriptor is created non-inheritable.
1414
1415 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001416int
1417_Py_open_noraise(const char *pathname, int flags)
1418{
1419 return _Py_open_impl(pathname, flags, 0);
1420}
1421
Victor Stinnerdaf45552013-08-28 00:53:59 +02001422/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001423 encoding and use fopen() otherwise.
1424
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001425 The file descriptor is created non-inheritable.
1426
1427 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001428FILE *
1429_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1430{
Victor Stinner4e314432010-10-07 21:45:39 +00001431 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001432 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1433 return NULL;
1434 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001435#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001436 char *cpath;
1437 char cmode[10];
1438 size_t r;
1439 r = wcstombs(cmode, mode, 10);
1440 if (r == (size_t)-1 || r >= 10) {
1441 errno = EINVAL;
1442 return NULL;
1443 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001444 cpath = _Py_EncodeLocaleRaw(path, NULL);
1445 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001446 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001447 }
Victor Stinner4e314432010-10-07 21:45:39 +00001448 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001449 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001450#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001451 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001452#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001453 if (f == NULL)
1454 return NULL;
1455 if (make_non_inheritable(fileno(f)) < 0) {
1456 fclose(f);
1457 return NULL;
1458 }
1459 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001460}
1461
Victor Stinnere42ccd22015-03-18 01:39:23 +01001462/* Wrapper to fopen().
1463
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001464 The file descriptor is created non-inheritable.
1465
1466 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001467FILE*
1468_Py_fopen(const char *pathname, const char *mode)
1469{
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001470 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1471 if (pathname_obj == NULL) {
Steve Dowerb82e17e2019-05-23 08:45:22 -07001472 return NULL;
1473 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001474 if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1475 Py_DECREF(pathname_obj);
1476 return NULL;
1477 }
1478 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001479
Victor Stinnerdaf45552013-08-28 00:53:59 +02001480 FILE *f = fopen(pathname, mode);
1481 if (f == NULL)
1482 return NULL;
1483 if (make_non_inheritable(fileno(f)) < 0) {
1484 fclose(f);
1485 return NULL;
1486 }
1487 return f;
1488}
1489
1490/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001491 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001492
Victor Stinnere42ccd22015-03-18 01:39:23 +01001493 Return the new file object on success. Raise an exception and return NULL
1494 on error.
1495
1496 The file descriptor is created non-inheritable.
1497
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001498 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1499 except if the Python signal handler raises an exception.
1500
Victor Stinner6f4fae82015-04-01 18:34:32 +02001501 Release the GIL to call _wfopen() or fopen(). The caller must hold
1502 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001503FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001504_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001505{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001506 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001507 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001508#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001509 wchar_t wmode[10];
1510 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001511
Victor Stinnere42ccd22015-03-18 01:39:23 +01001512 assert(PyGILState_Check());
1513
Steve Dowerb82e17e2019-05-23 08:45:22 -07001514 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1515 return NULL;
1516 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001517 if (!PyUnicode_Check(path)) {
1518 PyErr_Format(PyExc_TypeError,
1519 "str file path expected under Windows, got %R",
1520 Py_TYPE(path));
1521 return NULL;
1522 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001523#if USE_UNICODE_WCHAR_CACHE
1524 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1525#else /* USE_UNICODE_WCHAR_CACHE */
1526 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1527#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001528 if (wpath == NULL)
1529 return NULL;
1530
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001531 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1532 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001533 if (usize == 0) {
1534 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001535#if !USE_UNICODE_WCHAR_CACHE
1536 PyMem_Free(wpath);
1537#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001538 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001539 }
Victor Stinner4e314432010-10-07 21:45:39 +00001540
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001541 do {
1542 Py_BEGIN_ALLOW_THREADS
1543 f = _wfopen(wpath, wmode);
1544 Py_END_ALLOW_THREADS
1545 } while (f == NULL
1546 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001547#if !USE_UNICODE_WCHAR_CACHE
1548 PyMem_Free(wpath);
1549#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001550#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001551 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001552 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001553
1554 assert(PyGILState_Check());
1555
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001556 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001557 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001558 path_bytes = PyBytes_AS_STRING(bytes);
1559
Steve Dowerb82e17e2019-05-23 08:45:22 -07001560 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001561 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001562 return NULL;
1563 }
1564
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001565 do {
1566 Py_BEGIN_ALLOW_THREADS
1567 f = fopen(path_bytes, mode);
1568 Py_END_ALLOW_THREADS
1569 } while (f == NULL
1570 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001571
Victor Stinner4e314432010-10-07 21:45:39 +00001572 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001573#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001574 if (async_err)
1575 return NULL;
1576
Victor Stinnere42ccd22015-03-18 01:39:23 +01001577 if (f == NULL) {
1578 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001579 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001580 }
1581
1582 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001583 fclose(f);
1584 return NULL;
1585 }
1586 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001587}
1588
Victor Stinner66aab0c2015-03-19 22:53:20 +01001589/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001590
1591 On success, return the number of read bytes, it can be lower than count.
1592 If the current file offset is at or past the end of file, no bytes are read,
1593 and read() returns zero.
1594
1595 On error, raise an exception, set errno and return -1.
1596
1597 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1598 If the Python signal handler raises an exception, the function returns -1
1599 (the syscall is not retried).
1600
1601 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001602Py_ssize_t
1603_Py_read(int fd, void *buf, size_t count)
1604{
1605 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001606 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001607 int async_err = 0;
1608
Victor Stinner8a1be612016-03-14 22:07:55 +01001609 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001610
Victor Stinner66aab0c2015-03-19 22:53:20 +01001611 /* _Py_read() must not be called with an exception set, otherwise the
1612 * caller may think that read() was interrupted by a signal and the signal
1613 * handler raised an exception. */
1614 assert(!PyErr_Occurred());
1615
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001616 if (count > _PY_READ_MAX) {
1617 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001618 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001619
Steve Dower8fc89802015-04-12 00:26:27 -04001620 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001621 do {
1622 Py_BEGIN_ALLOW_THREADS
1623 errno = 0;
1624#ifdef MS_WINDOWS
1625 n = read(fd, buf, (int)count);
1626#else
1627 n = read(fd, buf, count);
1628#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001629 /* save/restore errno because PyErr_CheckSignals()
1630 * and PyErr_SetFromErrno() can modify it */
1631 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001632 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001633 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001634 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001635 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001636
1637 if (async_err) {
1638 /* read() was interrupted by a signal (failed with EINTR)
1639 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001640 errno = err;
1641 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001642 return -1;
1643 }
1644 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001645 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001646 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001647 return -1;
1648 }
1649
1650 return n;
1651}
1652
Victor Stinner82c3e452015-04-01 18:34:45 +02001653static Py_ssize_t
1654_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001655{
1656 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001657 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001658 int async_err = 0;
1659
Steve Dower8fc89802015-04-12 00:26:27 -04001660 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001661#ifdef MS_WINDOWS
1662 if (count > 32767 && isatty(fd)) {
1663 /* Issue #11395: the Windows console returns an error (12: not
1664 enough space error) on writing into stdout if stdout mode is
1665 binary and the length is greater than 66,000 bytes (or less,
1666 depending on heap usage). */
1667 count = 32767;
1668 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001669#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001670 if (count > _PY_WRITE_MAX) {
1671 count = _PY_WRITE_MAX;
1672 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001673
Victor Stinner82c3e452015-04-01 18:34:45 +02001674 if (gil_held) {
1675 do {
1676 Py_BEGIN_ALLOW_THREADS
1677 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001678#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001679 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001680#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001681 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001682#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001683 /* save/restore errno because PyErr_CheckSignals()
1684 * and PyErr_SetFromErrno() can modify it */
1685 err = errno;
1686 Py_END_ALLOW_THREADS
1687 } while (n < 0 && err == EINTR &&
1688 !(async_err = PyErr_CheckSignals()));
1689 }
1690 else {
1691 do {
1692 errno = 0;
1693#ifdef MS_WINDOWS
1694 n = write(fd, buf, (int)count);
1695#else
1696 n = write(fd, buf, count);
1697#endif
1698 err = errno;
1699 } while (n < 0 && err == EINTR);
1700 }
Steve Dower8fc89802015-04-12 00:26:27 -04001701 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001702
1703 if (async_err) {
1704 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001705 and the Python signal handler raised an exception (if gil_held is
1706 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001707 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001708 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001709 return -1;
1710 }
1711 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001712 if (gil_held)
1713 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001714 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001715 return -1;
1716 }
1717
1718 return n;
1719}
1720
Victor Stinner82c3e452015-04-01 18:34:45 +02001721/* Write count bytes of buf into fd.
1722
1723 On success, return the number of written bytes, it can be lower than count
1724 including 0. On error, raise an exception, set errno and return -1.
1725
1726 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1727 If the Python signal handler raises an exception, the function returns -1
1728 (the syscall is not retried).
1729
1730 Release the GIL to call write(). The caller must hold the GIL. */
1731Py_ssize_t
1732_Py_write(int fd, const void *buf, size_t count)
1733{
Victor Stinner8a1be612016-03-14 22:07:55 +01001734 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001735
Victor Stinner82c3e452015-04-01 18:34:45 +02001736 /* _Py_write() must not be called with an exception set, otherwise the
1737 * caller may think that write() was interrupted by a signal and the signal
1738 * handler raised an exception. */
1739 assert(!PyErr_Occurred());
1740
1741 return _Py_write_impl(fd, buf, count, 1);
1742}
1743
1744/* Write count bytes of buf into fd.
1745 *
1746 * On success, return the number of written bytes, it can be lower than count
1747 * including 0. On error, set errno and return -1.
1748 *
1749 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1750 * without calling the Python signal handler. */
1751Py_ssize_t
1752_Py_write_noraise(int fd, const void *buf, size_t count)
1753{
1754 return _Py_write_impl(fd, buf, count, 0);
1755}
1756
Victor Stinner4e314432010-10-07 21:45:39 +00001757#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001758
1759/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001760 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001761
Victor Stinner1be0d112019-03-18 17:47:26 +01001762 Return -1 on encoding error, on readlink() error, if the internal buffer is
1763 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001764int
Victor Stinner1be0d112019-03-18 17:47:26 +01001765_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001766{
1767 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001768 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001769 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001770 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001771 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001772 size_t r1;
1773
Victor Stinner9dd76202017-12-21 16:20:32 +01001774 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001775 if (cpath == NULL) {
1776 errno = EINVAL;
1777 return -1;
1778 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001779 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001780 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001781 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001782 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001783 }
1784 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001785 errno = EINVAL;
1786 return -1;
1787 }
1788 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001789 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001790 if (wbuf == NULL) {
1791 errno = EINVAL;
1792 return -1;
1793 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001794 /* wbuf must have space to store the trailing NUL character */
1795 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001796 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001797 errno = EINVAL;
1798 return -1;
1799 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001800 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001801 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001802 return (int)r1;
1803}
1804#endif
1805
1806#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001807
1808/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001809 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001810
Victor Stinner1be0d112019-03-18 17:47:26 +01001811 Return NULL on encoding error, realpath() error, decoding error
1812 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001813wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001814_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001815 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001816{
1817 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001818 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001819 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001820 char *res;
1821 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001822 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001823 if (cpath == NULL) {
1824 errno = EINVAL;
1825 return NULL;
1826 }
1827 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001828 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001829 if (res == NULL)
1830 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001831
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001832 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001833 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001834 errno = EINVAL;
1835 return NULL;
1836 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001837 /* wresolved_path must have space to store the trailing NUL character */
1838 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001839 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001840 errno = EINVAL;
1841 return NULL;
1842 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001843 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001844 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001845 return resolved_path;
1846}
1847#endif
1848
Victor Stinner3939c322019-06-25 15:02:43 +02001849
1850#ifndef MS_WINDOWS
1851int
1852_Py_isabs(const wchar_t *path)
1853{
1854 return (path[0] == SEP);
1855}
1856#endif
1857
1858
1859/* Get an absolute path.
1860 On error (ex: fail to get the current directory), return -1.
1861 On memory allocation failure, set *abspath_p to NULL and return 0.
1862 On success, return a newly allocated to *abspath_p to and return 0.
1863 The string must be freed by PyMem_RawFree(). */
1864int
1865_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1866{
1867#ifdef MS_WINDOWS
1868 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1869 DWORD result;
1870
1871 result = GetFullPathNameW(path,
1872 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1873 NULL);
1874 if (!result) {
1875 return -1;
1876 }
1877
1878 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1879 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1880 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1881 }
1882 else {
1883 woutbufp = NULL;
1884 }
1885 if (!woutbufp) {
1886 *abspath_p = NULL;
1887 return 0;
1888 }
1889
1890 result = GetFullPathNameW(path, result, woutbufp, NULL);
1891 if (!result) {
1892 PyMem_RawFree(woutbufp);
1893 return -1;
1894 }
1895 }
1896
1897 if (woutbufp != woutbuf) {
1898 *abspath_p = woutbufp;
1899 return 0;
1900 }
1901
1902 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1903 return 0;
1904#else
1905 if (_Py_isabs(path)) {
1906 *abspath_p = _PyMem_RawWcsdup(path);
1907 return 0;
1908 }
1909
1910 wchar_t cwd[MAXPATHLEN + 1];
1911 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
1912 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
1913 /* unable to get the current directory */
1914 return -1;
1915 }
1916
1917 size_t cwd_len = wcslen(cwd);
1918 size_t path_len = wcslen(path);
1919 size_t len = cwd_len + 1 + path_len + 1;
1920 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1921 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
1922 }
1923 else {
1924 *abspath_p = NULL;
1925 }
1926 if (*abspath_p == NULL) {
1927 return 0;
1928 }
1929
1930 wchar_t *abspath = *abspath_p;
1931 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
1932 abspath += cwd_len;
1933
1934 *abspath = (wchar_t)SEP;
1935 abspath++;
1936
1937 memcpy(abspath, path, path_len * sizeof(wchar_t));
1938 abspath += path_len;
1939
1940 *abspath = 0;
1941 return 0;
1942#endif
1943}
1944
1945
Victor Stinnerfaddaed2019-03-19 02:58:14 +01001946/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001947 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001948
Victor Stinner1be0d112019-03-18 17:47:26 +01001949 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1950 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001951wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01001952_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001953{
1954#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01001955 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1956 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00001957#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001958 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001959 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001960 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001961
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001962 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001963 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001964 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001965 if (wname == NULL)
1966 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01001967 /* wname must have space to store the trailing NUL character */
1968 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001969 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001970 return NULL;
1971 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001972 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001973 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001974 return buf;
1975#endif
1976}
1977
Victor Stinnerdaf45552013-08-28 00:53:59 +02001978/* Duplicate a file descriptor. The new file descriptor is created as
1979 non-inheritable. Return a new file descriptor on success, raise an OSError
1980 exception and return -1 on error.
1981
1982 The GIL is released to call dup(). The caller must hold the GIL. */
1983int
1984_Py_dup(int fd)
1985{
1986#ifdef MS_WINDOWS
1987 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001988#endif
1989
Victor Stinner8a1be612016-03-14 22:07:55 +01001990 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001991
Victor Stinnerdaf45552013-08-28 00:53:59 +02001992#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001993 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001994 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001995 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001996 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001997 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001998 return -1;
1999 }
2000
Victor Stinnerdaf45552013-08-28 00:53:59 +02002001 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002002 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002003 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002004 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002005 Py_END_ALLOW_THREADS
2006 if (fd < 0) {
2007 PyErr_SetFromErrno(PyExc_OSError);
2008 return -1;
2009 }
2010
Zackery Spytz28fca0c2019-06-17 01:17:14 -06002011 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2012 _Py_BEGIN_SUPPRESS_IPH
2013 close(fd);
2014 _Py_END_SUPPRESS_IPH
2015 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02002016 }
2017#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2018 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002019 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002020 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002021 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002022 Py_END_ALLOW_THREADS
2023 if (fd < 0) {
2024 PyErr_SetFromErrno(PyExc_OSError);
2025 return -1;
2026 }
2027
2028#else
2029 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002030 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002031 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002032 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002033 Py_END_ALLOW_THREADS
2034 if (fd < 0) {
2035 PyErr_SetFromErrno(PyExc_OSError);
2036 return -1;
2037 }
2038
2039 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002040 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002041 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002042 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002043 return -1;
2044 }
2045#endif
2046 return fd;
2047}
2048
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002049#ifndef MS_WINDOWS
2050/* Get the blocking mode of the file descriptor.
2051 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2052 raise an exception and return -1 on error. */
2053int
2054_Py_get_blocking(int fd)
2055{
Steve Dower8fc89802015-04-12 00:26:27 -04002056 int flags;
2057 _Py_BEGIN_SUPPRESS_IPH
2058 flags = fcntl(fd, F_GETFL, 0);
2059 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002060 if (flags < 0) {
2061 PyErr_SetFromErrno(PyExc_OSError);
2062 return -1;
2063 }
2064
2065 return !(flags & O_NONBLOCK);
2066}
2067
2068/* Set the blocking mode of the specified file descriptor.
2069
2070 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2071 otherwise.
2072
2073 Return 0 on success, raise an exception and return -1 on error. */
2074int
2075_Py_set_blocking(int fd, int blocking)
2076{
2077#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
2078 int arg = !blocking;
2079 if (ioctl(fd, FIONBIO, &arg) < 0)
2080 goto error;
2081#else
2082 int flags, res;
2083
Steve Dower8fc89802015-04-12 00:26:27 -04002084 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002085 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002086 if (flags >= 0) {
2087 if (blocking)
2088 flags = flags & (~O_NONBLOCK);
2089 else
2090 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002091
Steve Dower8fc89802015-04-12 00:26:27 -04002092 res = fcntl(fd, F_SETFL, flags);
2093 } else {
2094 res = -1;
2095 }
2096 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002097
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002098 if (res < 0)
2099 goto error;
2100#endif
2101 return 0;
2102
2103error:
2104 PyErr_SetFromErrno(PyExc_OSError);
2105 return -1;
2106}
2107#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002108
2109
2110int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002111_Py_GetLocaleconvNumeric(struct lconv *lc,
2112 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002113{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002114 assert(decimal_point != NULL);
2115 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002116
TIGirardif2312032020-10-20 08:39:52 -03002117#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002118 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002119 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002120 change_locale = 1;
2121 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002122 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002123 change_locale = 1;
2124 }
2125
2126 /* Keep a copy of the LC_CTYPE locale */
2127 char *oldloc = NULL, *loc = NULL;
2128 if (change_locale) {
2129 oldloc = setlocale(LC_CTYPE, NULL);
2130 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002131 PyErr_SetString(PyExc_RuntimeWarning,
2132 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002133 return -1;
2134 }
2135
2136 oldloc = _PyMem_Strdup(oldloc);
2137 if (!oldloc) {
2138 PyErr_NoMemory();
2139 return -1;
2140 }
2141
2142 loc = setlocale(LC_NUMERIC, NULL);
2143 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2144 loc = NULL;
2145 }
2146
2147 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002148 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002149 if LC_NUMERIC locale is different than LC_CTYPE locale and
2150 decimal_point and/or thousands_sep are non-ASCII or longer than
2151 1 byte */
2152 setlocale(LC_CTYPE, loc);
2153 }
2154 }
2155
TIGirardif2312032020-10-20 08:39:52 -03002156#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2157#else /* MS_WINDOWS */
2158/* Use _W_* fields of Windows strcut lconv */
2159#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2160#endif /* MS_WINDOWS */
2161
Victor Stinner02e6bf72018-11-20 16:20:16 +01002162 int res = -1;
2163
TIGirardif2312032020-10-20 08:39:52 -03002164 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002165 if (*decimal_point == NULL) {
2166 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002167 }
2168
TIGirardif2312032020-10-20 08:39:52 -03002169 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002170 if (*thousands_sep == NULL) {
2171 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002172 }
2173
2174 res = 0;
2175
Victor Stinner02e6bf72018-11-20 16:20:16 +01002176done:
TIGirardif2312032020-10-20 08:39:52 -03002177#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002178 if (loc != NULL) {
2179 setlocale(LC_CTYPE, oldloc);
2180 }
2181 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002182#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002183 return res;
TIGirardif2312032020-10-20 08:39:52 -03002184
2185#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002186}
Kyle Evans79925792020-10-13 15:04:44 -05002187
2188/* Our selection logic for which function to use is as follows:
2189 * 1. If close_range(2) is available, always prefer that; it's better for
2190 * contiguous ranges like this than fdwalk(3) which entails iterating over
2191 * the entire fd space and simply doing nothing for those outside the range.
2192 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2193 * closing up to sysconf(_SC_OPEN_MAX).
2194 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2195 * as that will be more performant if the range happens to have any chunk of
2196 * non-opened fd in the middle.
2197 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2198 */
2199#ifdef __FreeBSD__
2200# define USE_CLOSEFROM
2201#endif /* __FreeBSD__ */
2202
2203#ifdef HAVE_FDWALK
2204# define USE_FDWALK
2205#endif /* HAVE_FDWALK */
2206
2207#ifdef USE_FDWALK
2208static int
2209_fdwalk_close_func(void *lohi, int fd)
2210{
2211 int lo = ((int *)lohi)[0];
2212 int hi = ((int *)lohi)[1];
2213
2214 if (fd >= hi) {
2215 return 1;
2216 }
2217 else if (fd >= lo) {
2218 /* Ignore errors */
2219 (void)close(fd);
2220 }
2221 return 0;
2222}
2223#endif /* USE_FDWALK */
2224
2225/* Closes all file descriptors in [first, last], ignoring errors. */
2226void
2227_Py_closerange(int first, int last)
2228{
2229 first = Py_MAX(first, 0);
2230 _Py_BEGIN_SUPPRESS_IPH
2231#ifdef HAVE_CLOSE_RANGE
2232 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2233 /* Any errors encountered while closing file descriptors are ignored;
2234 * ENOSYS means no kernel support, though,
2235 * so we'll fallback to the other methods. */
2236 }
2237 else
2238#endif /* HAVE_CLOSE_RANGE */
2239#ifdef USE_CLOSEFROM
2240 if (last >= sysconf(_SC_OPEN_MAX)) {
2241 /* Any errors encountered while closing file descriptors are ignored */
2242 closefrom(first);
2243 }
2244 else
2245#endif /* USE_CLOSEFROM */
2246#ifdef USE_FDWALK
2247 {
2248 int lohi[2];
2249 lohi[0] = first;
2250 lohi[1] = last + 1;
2251 fdwalk(_fdwalk_close_func, lohi);
2252 }
2253#else
2254 {
2255 for (int i = first; i <= last; i++) {
2256 /* Ignore errors */
2257 (void)close(i);
2258 }
2259 }
2260#endif /* USE_FDWALK */
2261 _Py_END_SUPPRESS_IPH
2262}