blob: 8dc90fbe2b2e714d17ad773edd6a68439f89be31 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
21#ifdef HAVE_FCNTL_H
22#include <fcntl.h>
23#endif /* HAVE_FCNTL_H */
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020026/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020027
28 -1: unknown
29 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
30 1: open() supports O_CLOEXEC flag, close-on-exec is set
31
Victor Stinnera555cfc2015-03-18 00:22:14 +010032 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
33 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020034int _Py_open_cloexec_works = -1;
35#endif
36
Victor Stinner3d4226a2018-08-29 22:21:32 +020037
38static int
39get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
40{
41 switch (errors)
42 {
43 case _Py_ERROR_STRICT:
44 *surrogateescape = 0;
45 return 0;
46 case _Py_ERROR_SURROGATEESCAPE:
47 *surrogateescape = 1;
48 return 0;
49 default:
50 return -1;
51 }
52}
53
54
Brett Cannonefb00c02012-02-29 18:31:31 -050055PyObject *
56_Py_device_encoding(int fd)
57{
Steve Dower8fc89802015-04-12 00:26:27 -040058 int valid;
59 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070060 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040061 _Py_END_SUPPRESS_IPH
62 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050063 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040064
Victor Stinner14b9b112013-06-25 00:37:25 +020065#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010066 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050067 if (fd == 0)
68 cp = GetConsoleCP();
69 else if (fd == 1 || fd == 2)
70 cp = GetConsoleOutputCP();
71 else
72 cp = 0;
73 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
74 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010075 if (cp == 0) {
76 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050077 }
Victor Stinner35297182020-11-04 11:20:10 +010078
79 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
80#else
81 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050082#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050083}
84
Victor Stinnere2510952019-05-02 11:28:57 -040085#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +010086
87#define USE_FORCE_ASCII
88
Victor Stinnerd45c7f82012-12-04 01:34:47 +010089extern int _Py_normalize_encoding(const char *, char *, size_t);
90
Victor Stinnerd500e532018-08-28 17:27:36 +020091/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
92 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +010093 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
94 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
95 locale.getpreferredencoding() codec. For example, if command line arguments
96 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
97 UnicodeEncodeError instead of retrieving the original byte string.
98
99 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
100 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
101 one byte in range 0x80-0xff can be decoded from the locale encoding. The
102 workaround is also enabled on error, for example if getting the locale
103 failed.
104
Victor Stinnerd500e532018-08-28 17:27:36 +0200105 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
106 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
107 ASCII encoding in this case.
108
Philip Jenvey215c49a2013-01-15 13:24:12 -0800109 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100110
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200111 1: the workaround is used: Py_EncodeLocale() uses
112 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100113 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200114 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
115 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 -1: unknown, need to call check_force_ascii() to get the value
117*/
118static int force_ascii = -1;
119
120static int
121check_force_ascii(void)
122{
Victor Stinnerd500e532018-08-28 17:27:36 +0200123 char *loc = setlocale(LC_CTYPE, NULL);
124 if (loc == NULL) {
125 goto error;
126 }
127 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
128 /* the LC_CTYPE locale is different than C and POSIX */
129 return 0;
130 }
131
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100132#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200133 const char *codeset = nl_langinfo(CODESET);
134 if (!codeset || codeset[0] == '\0') {
135 /* CODESET is not set or empty */
136 goto error;
137 }
138
Victor Stinner54de2b12016-09-09 23:11:52 -0700139 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200140 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
141 goto error;
142 }
143
144#ifdef __hpux
145 if (strcmp(encoding, "roman8") == 0) {
146 unsigned char ch;
147 wchar_t wch;
148 size_t res;
149
150 ch = (unsigned char)0xA7;
151 res = mbstowcs(&wch, (char*)&ch, 1);
152 if (res != (size_t)-1 && wch == L'\xA7') {
153 /* On HP-UX withe C locale or the POSIX locale,
154 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
155 Latin1 encoding in practice. Force ASCII in this case.
156
157 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
158 return 1;
159 }
160 }
161#else
162 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100163 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700164 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100165 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700166 "ansi_x3.4_1968",
167 "ansi_x3.4_1986",
168 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100169 "cp367",
170 "csascii",
171 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700172 "iso646_us",
173 "iso_646.irv_1991",
174 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100175 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700176 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100177 NULL
178 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100179
Victor Stinnerd500e532018-08-28 17:27:36 +0200180 int is_ascii = 0;
181 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100182 if (strcmp(encoding, *alias) == 0) {
183 is_ascii = 1;
184 break;
185 }
186 }
187 if (!is_ascii) {
188 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
189 return 0;
190 }
191
Victor Stinnerd500e532018-08-28 17:27:36 +0200192 for (unsigned int i=0x80; i<=0xff; i++) {
193 char ch[1];
194 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100195 size_t res;
196
Victor Stinnerd500e532018-08-28 17:27:36 +0200197 unsigned uch = (unsigned char)i;
198 ch[0] = (char)uch;
199 res = mbstowcs(wch, ch, 1);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100200 if (res != (size_t)-1) {
201 /* decoding a non-ASCII character from the locale encoding succeed:
202 the locale encoding is not ASCII, force ASCII */
203 return 1;
204 }
205 }
206 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
207 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200208#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100209 return 0;
210#else
211 /* nl_langinfo(CODESET) is not available: always force ASCII */
212 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200213#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100214
215error:
Martin Panter46f50722016-05-26 05:35:26 +0000216 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100217 return 1;
218}
219
Victor Stinnerd500e532018-08-28 17:27:36 +0200220
221int
222_Py_GetForceASCII(void)
223{
224 if (force_ascii == -1) {
225 force_ascii = check_force_ascii();
226 }
227 return force_ascii;
228}
229
230
Victor Stinner353933e2018-11-23 13:08:26 +0100231void
232_Py_ResetForceASCII(void)
233{
234 force_ascii = -1;
235}
236
237
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100238static int
239encode_ascii(const wchar_t *text, char **str,
240 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200241 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100242{
243 char *result = NULL, *out;
244 size_t len, i;
245 wchar_t ch;
246
Victor Stinner3d4226a2018-08-29 22:21:32 +0200247 int surrogateescape;
248 if (get_surrogateescape(errors, &surrogateescape) < 0) {
249 return -3;
250 }
251
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100252 len = wcslen(text);
253
Victor Stinner9bee3292017-12-21 16:49:13 +0100254 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100255 if (raw_malloc) {
256 result = PyMem_RawMalloc(len + 1);
257 }
258 else {
259 result = PyMem_Malloc(len + 1);
260 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100261 if (result == NULL) {
262 return -1;
263 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100264
265 out = result;
266 for (i=0; i<len; i++) {
267 ch = text[i];
268
269 if (ch <= 0x7f) {
270 /* ASCII character */
271 *out++ = (char)ch;
272 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100273 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100274 /* UTF-8b surrogate */
275 *out++ = (char)(ch - 0xdc00);
276 }
277 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100278 if (raw_malloc) {
279 PyMem_RawFree(result);
280 }
281 else {
282 PyMem_Free(result);
283 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100284 if (error_pos != NULL) {
285 *error_pos = i;
286 }
287 if (reason) {
288 *reason = "encoding error";
289 }
290 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100291 }
292 }
293 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100294 *str = result;
295 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100296}
Victor Stinnerd500e532018-08-28 17:27:36 +0200297#else
298int
299_Py_GetForceASCII(void)
300{
301 return 0;
302}
Victor Stinner353933e2018-11-23 13:08:26 +0100303
304void
305_Py_ResetForceASCII(void)
306{
307 /* nothing to do */
308}
Victor Stinnere2510952019-05-02 11:28:57 -0400309#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100310
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100311
312#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
313static int
314decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200315 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100316{
317 wchar_t *res;
318 unsigned char *in;
319 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600320 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100321
Victor Stinner3d4226a2018-08-29 22:21:32 +0200322 int surrogateescape;
323 if (get_surrogateescape(errors, &surrogateescape) < 0) {
324 return -3;
325 }
326
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100327 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
328 return -1;
329 }
330 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
331 if (!res) {
332 return -1;
333 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100334
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100335 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100336 for (in = (unsigned char*)arg; *in; in++) {
337 unsigned char ch = *in;
338 if (ch < 128) {
339 *out++ = ch;
340 }
341 else {
342 if (!surrogateescape) {
343 PyMem_RawFree(res);
344 if (wlen) {
345 *wlen = in - (unsigned char*)arg;
346 }
347 if (reason) {
348 *reason = "decoding error";
349 }
350 return -2;
351 }
352 *out++ = 0xdc00 + ch;
353 }
354 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100355 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100356
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100357 if (wlen != NULL) {
358 *wlen = out - res;
359 }
360 *wstr = res;
361 return 0;
362}
363#endif /* !HAVE_MBRTOWC */
364
365static int
366decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200367 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000368{
369 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100370 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000371 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200372#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000373 unsigned char *in;
374 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000375 mbstate_t mbs;
376#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100377
Victor Stinner3d4226a2018-08-29 22:21:32 +0200378 int surrogateescape;
379 if (get_surrogateescape(errors, &surrogateescape) < 0) {
380 return -3;
381 }
382
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100383#ifdef HAVE_BROKEN_MBSTOWCS
384 /* Some platforms have a broken implementation of
385 * mbstowcs which does not count the characters that
386 * would result from conversion. Use an upper bound.
387 */
388 argsize = strlen(arg);
389#else
390 argsize = mbstowcs(NULL, arg, 0);
391#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000392 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100393 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
394 return -1;
395 }
396 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
397 if (!res) {
398 return -1;
399 }
400
401 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000402 if (count != (size_t)-1) {
403 wchar_t *tmp;
404 /* Only use the result if it contains no
405 surrogate characters. */
406 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100407 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000408 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000409 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100410 if (wlen != NULL) {
411 *wlen = count;
412 }
413 *wstr = res;
414 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000415 }
Victor Stinner4e314432010-10-07 21:45:39 +0000416 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200417 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000418 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100419
Victor Stinner4e314432010-10-07 21:45:39 +0000420 /* Conversion failed. Fall back to escaping with surrogateescape. */
421#ifdef HAVE_MBRTOWC
422 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
423
424 /* Overallocate; as multi-byte characters are in the argument, the
425 actual output could use less memory. */
426 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100427 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
428 return -1;
429 }
430 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
431 if (!res) {
432 return -1;
433 }
434
Victor Stinner4e314432010-10-07 21:45:39 +0000435 in = (unsigned char*)arg;
436 out = res;
437 memset(&mbs, 0, sizeof mbs);
438 while (argsize) {
439 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100440 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000441 /* Reached end of string; null char stored. */
442 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100443 }
444
Victor Stinner4e314432010-10-07 21:45:39 +0000445 if (converted == (size_t)-2) {
446 /* Incomplete character. This should never happen,
447 since we provide everything that we have -
448 unless there is a bug in the C library, or I
449 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100450 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000451 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100452
Victor Stinner4e314432010-10-07 21:45:39 +0000453 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100454 if (!surrogateescape) {
455 goto decode_error;
456 }
457
Victor Stinner4e314432010-10-07 21:45:39 +0000458 /* Conversion error. Escape as UTF-8b, and start over
459 in the initial shift state. */
460 *out++ = 0xdc00 + *in++;
461 argsize--;
462 memset(&mbs, 0, sizeof mbs);
463 continue;
464 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100465
Victor Stinner76df43d2012-10-30 01:42:39 +0100466 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100467 if (!surrogateescape) {
468 goto decode_error;
469 }
470
Victor Stinner4e314432010-10-07 21:45:39 +0000471 /* Surrogate character. Escape the original
472 byte sequence with surrogateescape. */
473 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100474 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000475 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100476 }
Victor Stinner4e314432010-10-07 21:45:39 +0000477 continue;
478 }
479 /* successfully converted some bytes */
480 in += converted;
481 argsize -= converted;
482 out++;
483 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100484 if (wlen != NULL) {
485 *wlen = out - res;
486 }
487 *wstr = res;
488 return 0;
489
490decode_error:
491 PyMem_RawFree(res);
492 if (wlen) {
493 *wlen = in - (unsigned char*)arg;
494 }
495 if (reason) {
496 *reason = "decoding error";
497 }
498 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100499#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000500 /* Cannot use C locale for escaping; manually escape as if charset
501 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
502 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200503 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100504#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100505}
506
507
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100508/* Decode a byte string from the locale encoding.
509
510 Use the strict error handler if 'surrogateescape' is zero. Use the
511 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
512 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
513 can be decoded as a surrogate character, escape the bytes using the
514 surrogateescape error handler instead of decoding them.
515
Ville Skyttä61f82e02018-04-20 23:08:45 +0300516 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100517 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
518 the number of wide characters excluding the null character into *wlen.
519
520 On memory allocation failure, return -1.
521
522 On decoding error, return -2. If wlen is not NULL, write the start of
523 invalid byte sequence in the input string into *wlen. If reason is not NULL,
524 write the decoding error message into *reason.
525
Victor Stinner3d4226a2018-08-29 22:21:32 +0200526 Return -3 if the error handler 'errors' is not supported.
527
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100528 Use the Py_EncodeLocaleEx() function to encode the character string back to
529 a byte string. */
530int
531_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
532 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200533 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100534{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100535 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400536#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100537 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200538 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100539#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200540 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100541#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100542 }
543
Victor Stinnere2510952019-05-02 11:28:57 -0400544#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100545 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200546 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100547#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200548 int use_utf8 = (Py_UTF8Mode == 1);
549#ifdef MS_WINDOWS
550 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
551#endif
552 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200553 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
554 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100555 }
556
557#ifdef USE_FORCE_ASCII
558 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100559 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100560 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100561
562 if (force_ascii) {
563 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200564 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100565 }
566#endif
567
Victor Stinner3d4226a2018-08-29 22:21:32 +0200568 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400569#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100570}
571
572
Victor Stinner91106cd2017-12-13 12:29:09 +0100573/* Decode a byte string from the locale encoding with the
574 surrogateescape error handler: undecodable bytes are decoded as characters
575 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
576 character, escape the bytes using the surrogateescape error handler instead
577 of decoding them.
578
579 Return a pointer to a newly allocated wide character string, use
580 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
581 wide characters excluding the null character into *size
582
583 Return NULL on decoding error or memory allocation error. If *size* is not
584 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
585 decoding error.
586
587 Decoding errors should never happen, unless there is a bug in the C
588 library.
589
590 Use the Py_EncodeLocale() function to encode the character string back to a
591 byte string. */
592wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100593Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100594{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100595 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200596 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
597 NULL, 0,
598 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100599 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200600 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100601 if (wlen != NULL) {
602 *wlen = (size_t)res;
603 }
604 return NULL;
605 }
606 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100607}
Victor Stinner91106cd2017-12-13 12:29:09 +0100608
Victor Stinner91106cd2017-12-13 12:29:09 +0100609
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100610static int
611encode_current_locale(const wchar_t *text, char **str,
612 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200613 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100614{
Victor Stinner4e314432010-10-07 21:45:39 +0000615 const size_t len = wcslen(text);
616 char *result = NULL, *bytes = NULL;
617 size_t i, size, converted;
618 wchar_t c, buf[2];
619
Victor Stinner3d4226a2018-08-29 22:21:32 +0200620 int surrogateescape;
621 if (get_surrogateescape(errors, &surrogateescape) < 0) {
622 return -3;
623 }
624
Victor Stinner4e314432010-10-07 21:45:39 +0000625 /* The function works in two steps:
626 1. compute the length of the output buffer in bytes (size)
627 2. outputs the bytes */
628 size = 0;
629 buf[1] = 0;
630 while (1) {
631 for (i=0; i < len; i++) {
632 c = text[i];
633 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100634 if (!surrogateescape) {
635 goto encode_error;
636 }
Victor Stinner4e314432010-10-07 21:45:39 +0000637 /* UTF-8b surrogate */
638 if (bytes != NULL) {
639 *bytes++ = c - 0xdc00;
640 size--;
641 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100642 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000643 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100644 }
Victor Stinner4e314432010-10-07 21:45:39 +0000645 continue;
646 }
647 else {
648 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100649 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000650 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 }
652 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000653 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100654 }
Victor Stinner4e314432010-10-07 21:45:39 +0000655 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100656 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000657 }
658 if (bytes != NULL) {
659 bytes += converted;
660 size -= converted;
661 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100662 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000663 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100664 }
Victor Stinner4e314432010-10-07 21:45:39 +0000665 }
666 }
667 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100668 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000669 break;
670 }
671
672 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100673 if (raw_malloc) {
674 result = PyMem_RawMalloc(size);
675 }
676 else {
677 result = PyMem_Malloc(size);
678 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100679 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100680 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100681 }
Victor Stinner4e314432010-10-07 21:45:39 +0000682 bytes = result;
683 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 *str = result;
685 return 0;
686
687encode_error:
688 if (raw_malloc) {
689 PyMem_RawFree(result);
690 }
691 else {
692 PyMem_Free(result);
693 }
694 if (error_pos != NULL) {
695 *error_pos = i;
696 }
697 if (reason) {
698 *reason = "encoding error";
699 }
700 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100701}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100702
Victor Stinner3d4226a2018-08-29 22:21:32 +0200703
704/* Encode a string to the locale encoding.
705
706 Parameters:
707
708 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
709 of PyMem_Malloc().
710 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
711 Python filesystem encoding.
712 * errors: error handler like "strict" or "surrogateescape".
713
714 Return value:
715
716 0: success, *str is set to a newly allocated decoded string.
717 -1: memory allocation failure
718 -2: encoding error, set *error_pos and *reason (if set).
719 -3: the error handler 'errors' is not supported.
720 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100721static int
722encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
723 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200724 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100725{
726 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400727#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100728 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200729 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100730#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100731 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200732 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100733#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100734 }
735
Victor Stinnere2510952019-05-02 11:28:57 -0400736#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100737 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200738 raw_malloc, errors);
739#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200740 int use_utf8 = (Py_UTF8Mode == 1);
741#ifdef MS_WINDOWS
742 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
743#endif
744 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100745 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200746 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100747 }
748
749#ifdef USE_FORCE_ASCII
750 if (force_ascii == -1) {
751 force_ascii = check_force_ascii();
752 }
753
754 if (force_ascii) {
755 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200756 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100757 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100758#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100759
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100760 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200761 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400762#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100763}
764
Victor Stinner9dd76202017-12-21 16:20:32 +0100765static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100766encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100767 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100768{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100769 char *str;
770 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200771 raw_malloc, current_locale,
772 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100773 if (res != -2 && error_pos) {
774 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100775 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100776 if (res != 0) {
777 return NULL;
778 }
779 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100780}
781
Victor Stinner91106cd2017-12-13 12:29:09 +0100782/* Encode a wide character string to the locale encoding with the
783 surrogateescape error handler: surrogate characters in the range
784 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
785
786 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
787 the memory. Return NULL on encoding or memory allocation error.
788
789 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
790 to the index of the invalid character on encoding error.
791
792 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
793 character string. */
794char*
795Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
796{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100797 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100798}
Victor Stinner91106cd2017-12-13 12:29:09 +0100799
Victor Stinner91106cd2017-12-13 12:29:09 +0100800
Victor Stinner9dd76202017-12-21 16:20:32 +0100801/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
802 instead of PyMem_Free(). */
803char*
804_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
805{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100806 return encode_locale(text, error_pos, 1, 0);
807}
808
809
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100810int
811_Py_EncodeLocaleEx(const wchar_t *text, char **str,
812 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200813 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100814{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100815 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200816 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000817}
818
Victor Stinner6672d0c2010-10-07 22:53:43 +0000819
Victor Stinner82458b62020-11-01 20:59:35 +0100820// Get the current locale encoding name:
821//
822// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
823// - Return "UTF-8" if the UTF-8 Mode is enabled
824// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100825// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100826// - Otherwise, return nl_langinfo(CODESET).
827//
Victor Stinnere662c392020-11-01 23:07:23 +0100828// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100829//
Victor Stinner710e8262020-10-31 01:02:09 +0100830// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100831wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100832_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100833{
834#ifdef _Py_FORCE_UTF8_LOCALE
835 // On Android langinfo.h and CODESET are missing,
836 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100837 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100838#else
839 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
840 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100841 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100842 }
843
Victor Stinner82458b62020-11-01 20:59:35 +0100844#ifdef MS_WINDOWS
845 wchar_t encoding[23];
846 unsigned int ansi_codepage = GetACP();
847 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
848 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
849 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100850#else
851 const char *encoding = nl_langinfo(CODESET);
852 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100853 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
854 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100855 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100856 }
Victor Stinner710e8262020-10-31 01:02:09 +0100857
Victor Stinner82458b62020-11-01 20:59:35 +0100858 wchar_t *wstr;
859 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100860 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100861 if (res < 0) {
862 return NULL;
863 }
864 return wstr;
865#endif // !MS_WINDOWS
866
867#endif // !_Py_FORCE_UTF8_LOCALE
868}
869
870
871PyObject *
872_Py_GetLocaleEncodingObject(void)
873{
Victor Stinnere662c392020-11-01 23:07:23 +0100874 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100875 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100876 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100877 return NULL;
878 }
879
880 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
881 PyMem_RawFree(encoding);
882 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100883}
884
885
Steve Dowerf2f373f2015-02-21 08:44:05 -0800886#ifdef MS_WINDOWS
887static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
888
889static void
890FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
891{
892 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
893 /* Cannot simply cast and dereference in_ptr,
894 since it might not be aligned properly */
895 __int64 in;
896 memcpy(&in, in_ptr, sizeof(in));
897 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
898 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
899}
900
901void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800902_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800903{
904 /* XXX endianness */
905 __int64 out;
906 out = time_in + secs_between_epochs;
907 out = out * 10000000 + nsec_in / 100;
908 memcpy(out_ptr, &out, sizeof(out));
909}
910
911/* Below, we *know* that ugo+r is 0444 */
912#if _S_IREAD != 0400
913#error Unsupported C library
914#endif
915static int
916attributes_to_mode(DWORD attr)
917{
918 int m = 0;
919 if (attr & FILE_ATTRIBUTE_DIRECTORY)
920 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
921 else
922 m |= _S_IFREG;
923 if (attr & FILE_ATTRIBUTE_READONLY)
924 m |= 0444;
925 else
926 m |= 0666;
927 return m;
928}
929
Steve Dowerbf1f3762015-02-21 15:26:02 -0800930void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200931_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
932 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800933{
934 memset(result, 0, sizeof(*result));
935 result->st_mode = attributes_to_mode(info->dwFileAttributes);
936 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
937 result->st_dev = info->dwVolumeSerialNumber;
938 result->st_rdev = result->st_dev;
939 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
940 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
941 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
942 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100943 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -0700944 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
945 open other name surrogate reparse points without traversing them. To
946 detect/handle these, check st_file_attributes and st_reparse_tag. */
947 result->st_reparse_tag = reparse_tag;
948 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
949 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -0800950 /* first clear the S_IFMT bits */
951 result->st_mode ^= (result->st_mode & S_IFMT);
952 /* now set the bits that make this a symlink */
953 result->st_mode |= S_IFLNK;
954 }
955 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800956}
957#endif
958
959/* Return information about a file.
960
961 On POSIX, use fstat().
962
963 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800964 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
965 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800966 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200967
968 On Windows, set the last Windows error and return nonzero on error. On
969 POSIX, set errno and return nonzero on error. Fill status and return 0 on
970 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800971int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200972_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800973{
974#ifdef MS_WINDOWS
975 BY_HANDLE_FILE_INFORMATION info;
976 HANDLE h;
977 int type;
978
Steve Dower940f33a2016-09-08 11:21:54 -0700979 _Py_BEGIN_SUPPRESS_IPH
980 h = (HANDLE)_get_osfhandle(fd);
981 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800982
983 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400984 /* errno is already set by _get_osfhandle, but we also set
985 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800986 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800987 return -1;
988 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200989 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800990
991 type = GetFileType(h);
992 if (type == FILE_TYPE_UNKNOWN) {
993 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400994 if (error != 0) {
995 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800996 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400997 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800998 /* else: valid but unknown file */
999 }
1000
1001 if (type != FILE_TYPE_DISK) {
1002 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001003 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001004 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001005 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001006 return 0;
1007 }
1008
1009 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001010 /* The Win32 error is already set, but we also set errno for
1011 callers who expect it */
1012 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001013 return -1;
1014 }
1015
Victor Stinnere134a7f2015-03-30 10:09:31 +02001016 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001017 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001018 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001019 return 0;
1020#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001021 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001022#endif
1023}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001024
Victor Stinnere134a7f2015-03-30 10:09:31 +02001025/* Return information about a file.
1026
1027 On POSIX, use fstat().
1028
1029 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001030 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1031 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001032 #23152.
1033
1034 Raise an exception and return -1 on error. On Windows, set the last Windows
1035 error on error. On POSIX, set errno on error. Fill status and return 0 on
1036 success.
1037
Victor Stinner6f4fae82015-04-01 18:34:32 +02001038 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1039 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001040int
1041_Py_fstat(int fd, struct _Py_stat_struct *status)
1042{
1043 int res;
1044
Victor Stinner8a1be612016-03-14 22:07:55 +01001045 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001046
Victor Stinnere134a7f2015-03-30 10:09:31 +02001047 Py_BEGIN_ALLOW_THREADS
1048 res = _Py_fstat_noraise(fd, status);
1049 Py_END_ALLOW_THREADS
1050
1051 if (res != 0) {
1052#ifdef MS_WINDOWS
1053 PyErr_SetFromWindowsErr(0);
1054#else
1055 PyErr_SetFromErrno(PyExc_OSError);
1056#endif
1057 return -1;
1058 }
1059 return 0;
1060}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001061
Victor Stinner6672d0c2010-10-07 22:53:43 +00001062/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1063 call stat() otherwise. Only fill st_mode attribute on Windows.
1064
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001065 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1066 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001067
1068int
Victor Stinnera4a75952010-10-07 22:23:10 +00001069_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001070{
1071#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001072 int err;
1073 struct _stat wstatbuf;
1074
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001075#if USE_UNICODE_WCHAR_CACHE
1076 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1077#else /* USE_UNICODE_WCHAR_CACHE */
1078 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1079#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001080 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001081 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001082
Victor Stinneree587ea2011-11-17 00:51:38 +01001083 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001084 if (!err)
1085 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001086#if !USE_UNICODE_WCHAR_CACHE
1087 PyMem_Free(wpath);
1088#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001089 return err;
1090#else
1091 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001092 PyObject *bytes;
1093 char *cpath;
1094
1095 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001096 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001097 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001098
1099 /* check for embedded null bytes */
1100 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1101 Py_DECREF(bytes);
1102 return -2;
1103 }
1104
1105 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001106 Py_DECREF(bytes);
1107 return ret;
1108#endif
1109}
1110
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001111
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001112/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001113static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001114get_inheritable(int fd, int raise)
1115{
1116#ifdef MS_WINDOWS
1117 HANDLE handle;
1118 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001119
Steve Dower8fc89802015-04-12 00:26:27 -04001120 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001121 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001122 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001123 if (handle == INVALID_HANDLE_VALUE) {
1124 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001125 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001126 return -1;
1127 }
1128
1129 if (!GetHandleInformation(handle, &flags)) {
1130 if (raise)
1131 PyErr_SetFromWindowsErr(0);
1132 return -1;
1133 }
1134
1135 return (flags & HANDLE_FLAG_INHERIT);
1136#else
1137 int flags;
1138
1139 flags = fcntl(fd, F_GETFD, 0);
1140 if (flags == -1) {
1141 if (raise)
1142 PyErr_SetFromErrno(PyExc_OSError);
1143 return -1;
1144 }
1145 return !(flags & FD_CLOEXEC);
1146#endif
1147}
1148
1149/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001150 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001151 raise an exception and return -1 on error. */
1152int
1153_Py_get_inheritable(int fd)
1154{
1155 return get_inheritable(fd, 1);
1156}
1157
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001158
1159/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001160static int
1161set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1162{
1163#ifdef MS_WINDOWS
1164 HANDLE handle;
1165 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001166#else
1167#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1168 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001169 int request;
1170 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001171#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001172 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001173 int res;
1174#endif
1175
1176 /* atomic_flag_works can only be used to make the file descriptor
1177 non-inheritable */
1178 assert(!(atomic_flag_works != NULL && inheritable));
1179
1180 if (atomic_flag_works != NULL && !inheritable) {
1181 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001182 int isInheritable = get_inheritable(fd, raise);
1183 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001184 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001185 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001186 }
1187
1188 if (*atomic_flag_works)
1189 return 0;
1190 }
1191
1192#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001193 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001194 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001195 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001196 if (handle == INVALID_HANDLE_VALUE) {
1197 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001198 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001199 return -1;
1200 }
1201
1202 if (inheritable)
1203 flags = HANDLE_FLAG_INHERIT;
1204 else
1205 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001206
1207 /* This check can be removed once support for Windows 7 ends. */
1208#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1209 GetFileType(handle) == FILE_TYPE_CHAR)
1210
1211 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1212 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001213 if (raise)
1214 PyErr_SetFromWindowsErr(0);
1215 return -1;
1216 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001217#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001218 return 0;
1219
Victor Stinnerdaf45552013-08-28 00:53:59 +02001220#else
Victor Stinner282124b2014-09-02 11:41:04 +02001221
1222#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001223 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001224 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001225 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1226 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001227 if (inheritable)
1228 request = FIONCLEX;
1229 else
1230 request = FIOCLEX;
1231 err = ioctl(fd, request, NULL);
1232 if (!err) {
1233 ioctl_works = 1;
1234 return 0;
1235 }
1236
Victor Stinner3116cc42016-05-19 16:46:18 +02001237 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001238 if (raise)
1239 PyErr_SetFromErrno(PyExc_OSError);
1240 return -1;
1241 }
1242 else {
1243 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1244 device". The ioctl is declared but not supported by the kernel.
1245 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001246 Illumos-based OS for example.
1247
1248 Issue #27057: When SELinux policy disallows ioctl it will fail
1249 with EACCES. While FIOCLEX is safe operation it may be
1250 unavailable because ioctl was denied altogether.
1251 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001252 ioctl_works = 0;
1253 }
1254 /* fallback to fcntl() if ioctl() does not work */
1255 }
1256#endif
1257
1258 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001259 flags = fcntl(fd, F_GETFD);
1260 if (flags < 0) {
1261 if (raise)
1262 PyErr_SetFromErrno(PyExc_OSError);
1263 return -1;
1264 }
1265
Victor Stinnera858bbd2016-04-17 16:51:52 +02001266 if (inheritable) {
1267 new_flags = flags & ~FD_CLOEXEC;
1268 }
1269 else {
1270 new_flags = flags | FD_CLOEXEC;
1271 }
1272
1273 if (new_flags == flags) {
1274 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1275 return 0;
1276 }
1277
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001278 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001279 if (res < 0) {
1280 if (raise)
1281 PyErr_SetFromErrno(PyExc_OSError);
1282 return -1;
1283 }
1284 return 0;
1285#endif
1286}
1287
1288/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001289 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001290static int
1291make_non_inheritable(int fd)
1292{
1293 return set_inheritable(fd, 0, 0, NULL);
1294}
1295
1296/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001297 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001298
1299 If atomic_flag_works is not NULL:
1300
1301 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1302 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1303 set the inheritable flag
1304 * if *atomic_flag_works==1: do nothing
1305 * if *atomic_flag_works==0: set inheritable flag to False
1306
1307 Set atomic_flag_works to NULL if no atomic flag was used to create the
1308 file descriptor.
1309
1310 atomic_flag_works can only be used to make a file descriptor
1311 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1312int
1313_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1314{
1315 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1316}
1317
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001318/* Same as _Py_set_inheritable() but on error, set errno and
1319 don't raise an exception.
1320 This function is async-signal-safe. */
1321int
1322_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1323{
1324 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1325}
1326
Victor Stinnera555cfc2015-03-18 00:22:14 +01001327static int
1328_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001329{
1330 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001331 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001332#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001333 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001334#endif
1335
1336#ifdef MS_WINDOWS
1337 flags |= O_NOINHERIT;
1338#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001339 atomic_flag_works = &_Py_open_cloexec_works;
1340 flags |= O_CLOEXEC;
1341#else
1342 atomic_flag_works = NULL;
1343#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001344
Victor Stinnera555cfc2015-03-18 00:22:14 +01001345 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001346 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1347 if (pathname_obj == NULL) {
1348 return -1;
1349 }
1350 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1351 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001352 return -1;
1353 }
1354
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001355 do {
1356 Py_BEGIN_ALLOW_THREADS
1357 fd = open(pathname, flags);
1358 Py_END_ALLOW_THREADS
1359 } while (fd < 0
1360 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001361 if (async_err) {
1362 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001363 return -1;
1364 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001365 if (fd < 0) {
1366 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1367 Py_DECREF(pathname_obj);
1368 return -1;
1369 }
1370 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001371 }
1372 else {
1373 fd = open(pathname, flags);
1374 if (fd < 0)
1375 return -1;
1376 }
1377
1378#ifndef MS_WINDOWS
1379 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001380 close(fd);
1381 return -1;
1382 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001383#endif
1384
Victor Stinnerdaf45552013-08-28 00:53:59 +02001385 return fd;
1386}
1387
Victor Stinnera555cfc2015-03-18 00:22:14 +01001388/* Open a file with the specified flags (wrapper to open() function).
1389 Return a file descriptor on success. Raise an exception and return -1 on
1390 error.
1391
1392 The file descriptor is created non-inheritable.
1393
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001394 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1395 except if the Python signal handler raises an exception.
1396
Victor Stinner6f4fae82015-04-01 18:34:32 +02001397 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001398int
1399_Py_open(const char *pathname, int flags)
1400{
1401 /* _Py_open() must be called with the GIL held. */
1402 assert(PyGILState_Check());
1403 return _Py_open_impl(pathname, flags, 1);
1404}
1405
1406/* Open a file with the specified flags (wrapper to open() function).
1407 Return a file descriptor on success. Set errno and return -1 on error.
1408
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001409 The file descriptor is created non-inheritable.
1410
1411 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001412int
1413_Py_open_noraise(const char *pathname, int flags)
1414{
1415 return _Py_open_impl(pathname, flags, 0);
1416}
1417
Victor Stinnerdaf45552013-08-28 00:53:59 +02001418/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001419 encoding and use fopen() otherwise.
1420
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001421 The file descriptor is created non-inheritable.
1422
1423 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001424FILE *
1425_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1426{
Victor Stinner4e314432010-10-07 21:45:39 +00001427 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001428 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1429 return NULL;
1430 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001431#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001432 char *cpath;
1433 char cmode[10];
1434 size_t r;
1435 r = wcstombs(cmode, mode, 10);
1436 if (r == (size_t)-1 || r >= 10) {
1437 errno = EINVAL;
1438 return NULL;
1439 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001440 cpath = _Py_EncodeLocaleRaw(path, NULL);
1441 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001442 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001443 }
Victor Stinner4e314432010-10-07 21:45:39 +00001444 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001445 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001446#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001447 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001448#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001449 if (f == NULL)
1450 return NULL;
1451 if (make_non_inheritable(fileno(f)) < 0) {
1452 fclose(f);
1453 return NULL;
1454 }
1455 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001456}
1457
Victor Stinnerdaf45552013-08-28 00:53:59 +02001458
1459/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001460 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001461
Victor Stinnere42ccd22015-03-18 01:39:23 +01001462 Return the new file object on success. Raise an exception and return NULL
1463 on error.
1464
1465 The file descriptor is created non-inheritable.
1466
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001467 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1468 except if the Python signal handler raises an exception.
1469
Victor Stinner6f4fae82015-04-01 18:34:32 +02001470 Release the GIL to call _wfopen() or fopen(). The caller must hold
1471 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001472FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001473_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001474{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001475 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001476 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001477#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001478 wchar_t wmode[10];
1479 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001480
Victor Stinnere42ccd22015-03-18 01:39:23 +01001481 assert(PyGILState_Check());
1482
Steve Dowerb82e17e2019-05-23 08:45:22 -07001483 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1484 return NULL;
1485 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001486 if (!PyUnicode_Check(path)) {
1487 PyErr_Format(PyExc_TypeError,
1488 "str file path expected under Windows, got %R",
1489 Py_TYPE(path));
1490 return NULL;
1491 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001492#if USE_UNICODE_WCHAR_CACHE
1493 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1494#else /* USE_UNICODE_WCHAR_CACHE */
1495 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1496#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001497 if (wpath == NULL)
1498 return NULL;
1499
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001500 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1501 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001502 if (usize == 0) {
1503 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001504#if !USE_UNICODE_WCHAR_CACHE
1505 PyMem_Free(wpath);
1506#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001507 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001508 }
Victor Stinner4e314432010-10-07 21:45:39 +00001509
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001510 do {
1511 Py_BEGIN_ALLOW_THREADS
1512 f = _wfopen(wpath, wmode);
1513 Py_END_ALLOW_THREADS
1514 } while (f == NULL
1515 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001516#if !USE_UNICODE_WCHAR_CACHE
1517 PyMem_Free(wpath);
1518#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001519#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001520 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001521 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001522
1523 assert(PyGILState_Check());
1524
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001525 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001526 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001527 path_bytes = PyBytes_AS_STRING(bytes);
1528
Steve Dowerb82e17e2019-05-23 08:45:22 -07001529 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001530 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001531 return NULL;
1532 }
1533
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001534 do {
1535 Py_BEGIN_ALLOW_THREADS
1536 f = fopen(path_bytes, mode);
1537 Py_END_ALLOW_THREADS
1538 } while (f == NULL
1539 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001540
Victor Stinner4e314432010-10-07 21:45:39 +00001541 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001542#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001543 if (async_err)
1544 return NULL;
1545
Victor Stinnere42ccd22015-03-18 01:39:23 +01001546 if (f == NULL) {
1547 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001548 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001549 }
1550
1551 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001552 fclose(f);
1553 return NULL;
1554 }
1555 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001556}
1557
Victor Stinner66aab0c2015-03-19 22:53:20 +01001558/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001559
1560 On success, return the number of read bytes, it can be lower than count.
1561 If the current file offset is at or past the end of file, no bytes are read,
1562 and read() returns zero.
1563
1564 On error, raise an exception, set errno and return -1.
1565
1566 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1567 If the Python signal handler raises an exception, the function returns -1
1568 (the syscall is not retried).
1569
1570 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001571Py_ssize_t
1572_Py_read(int fd, void *buf, size_t count)
1573{
1574 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001575 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001576 int async_err = 0;
1577
Victor Stinner8a1be612016-03-14 22:07:55 +01001578 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001579
Victor Stinner66aab0c2015-03-19 22:53:20 +01001580 /* _Py_read() must not be called with an exception set, otherwise the
1581 * caller may think that read() was interrupted by a signal and the signal
1582 * handler raised an exception. */
1583 assert(!PyErr_Occurred());
1584
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001585 if (count > _PY_READ_MAX) {
1586 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001587 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001588
Steve Dower8fc89802015-04-12 00:26:27 -04001589 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001590 do {
1591 Py_BEGIN_ALLOW_THREADS
1592 errno = 0;
1593#ifdef MS_WINDOWS
1594 n = read(fd, buf, (int)count);
1595#else
1596 n = read(fd, buf, count);
1597#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001598 /* save/restore errno because PyErr_CheckSignals()
1599 * and PyErr_SetFromErrno() can modify it */
1600 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001601 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001602 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001603 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001604 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001605
1606 if (async_err) {
1607 /* read() was interrupted by a signal (failed with EINTR)
1608 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001609 errno = err;
1610 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001611 return -1;
1612 }
1613 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001614 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001615 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001616 return -1;
1617 }
1618
1619 return n;
1620}
1621
Victor Stinner82c3e452015-04-01 18:34:45 +02001622static Py_ssize_t
1623_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001624{
1625 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001626 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001627 int async_err = 0;
1628
Steve Dower8fc89802015-04-12 00:26:27 -04001629 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001630#ifdef MS_WINDOWS
1631 if (count > 32767 && isatty(fd)) {
1632 /* Issue #11395: the Windows console returns an error (12: not
1633 enough space error) on writing into stdout if stdout mode is
1634 binary and the length is greater than 66,000 bytes (or less,
1635 depending on heap usage). */
1636 count = 32767;
1637 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001638#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001639 if (count > _PY_WRITE_MAX) {
1640 count = _PY_WRITE_MAX;
1641 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001642
Victor Stinner82c3e452015-04-01 18:34:45 +02001643 if (gil_held) {
1644 do {
1645 Py_BEGIN_ALLOW_THREADS
1646 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001647#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001648 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001649#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001650 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001651#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001652 /* save/restore errno because PyErr_CheckSignals()
1653 * and PyErr_SetFromErrno() can modify it */
1654 err = errno;
1655 Py_END_ALLOW_THREADS
1656 } while (n < 0 && err == EINTR &&
1657 !(async_err = PyErr_CheckSignals()));
1658 }
1659 else {
1660 do {
1661 errno = 0;
1662#ifdef MS_WINDOWS
1663 n = write(fd, buf, (int)count);
1664#else
1665 n = write(fd, buf, count);
1666#endif
1667 err = errno;
1668 } while (n < 0 && err == EINTR);
1669 }
Steve Dower8fc89802015-04-12 00:26:27 -04001670 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001671
1672 if (async_err) {
1673 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001674 and the Python signal handler raised an exception (if gil_held is
1675 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001676 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001677 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001678 return -1;
1679 }
1680 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001681 if (gil_held)
1682 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001683 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001684 return -1;
1685 }
1686
1687 return n;
1688}
1689
Victor Stinner82c3e452015-04-01 18:34:45 +02001690/* Write count bytes of buf into fd.
1691
1692 On success, return the number of written bytes, it can be lower than count
1693 including 0. On error, raise an exception, set errno and return -1.
1694
1695 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1696 If the Python signal handler raises an exception, the function returns -1
1697 (the syscall is not retried).
1698
1699 Release the GIL to call write(). The caller must hold the GIL. */
1700Py_ssize_t
1701_Py_write(int fd, const void *buf, size_t count)
1702{
Victor Stinner8a1be612016-03-14 22:07:55 +01001703 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001704
Victor Stinner82c3e452015-04-01 18:34:45 +02001705 /* _Py_write() must not be called with an exception set, otherwise the
1706 * caller may think that write() was interrupted by a signal and the signal
1707 * handler raised an exception. */
1708 assert(!PyErr_Occurred());
1709
1710 return _Py_write_impl(fd, buf, count, 1);
1711}
1712
1713/* Write count bytes of buf into fd.
1714 *
1715 * On success, return the number of written bytes, it can be lower than count
1716 * including 0. On error, set errno and return -1.
1717 *
1718 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1719 * without calling the Python signal handler. */
1720Py_ssize_t
1721_Py_write_noraise(int fd, const void *buf, size_t count)
1722{
1723 return _Py_write_impl(fd, buf, count, 0);
1724}
1725
Victor Stinner4e314432010-10-07 21:45:39 +00001726#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001727
1728/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001729 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001730
Victor Stinner1be0d112019-03-18 17:47:26 +01001731 Return -1 on encoding error, on readlink() error, if the internal buffer is
1732 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001733int
Victor Stinner1be0d112019-03-18 17:47:26 +01001734_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001735{
1736 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001737 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001738 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001739 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001740 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001741 size_t r1;
1742
Victor Stinner9dd76202017-12-21 16:20:32 +01001743 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001744 if (cpath == NULL) {
1745 errno = EINVAL;
1746 return -1;
1747 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001748 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001749 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001750 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001751 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001752 }
1753 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001754 errno = EINVAL;
1755 return -1;
1756 }
1757 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001758 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001759 if (wbuf == NULL) {
1760 errno = EINVAL;
1761 return -1;
1762 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001763 /* wbuf must have space to store the trailing NUL character */
1764 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001765 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001766 errno = EINVAL;
1767 return -1;
1768 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001769 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001770 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001771 return (int)r1;
1772}
1773#endif
1774
1775#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001776
1777/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001778 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001779
Victor Stinner1be0d112019-03-18 17:47:26 +01001780 Return NULL on encoding error, realpath() error, decoding error
1781 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001782wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001783_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001784 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001785{
1786 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001787 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001788 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001789 char *res;
1790 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001791 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001792 if (cpath == NULL) {
1793 errno = EINVAL;
1794 return NULL;
1795 }
1796 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001797 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001798 if (res == NULL)
1799 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001800
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001801 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001802 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001803 errno = EINVAL;
1804 return NULL;
1805 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001806 /* wresolved_path must have space to store the trailing NUL character */
1807 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001808 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001809 errno = EINVAL;
1810 return NULL;
1811 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001812 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001813 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001814 return resolved_path;
1815}
1816#endif
1817
Victor Stinner3939c322019-06-25 15:02:43 +02001818
1819#ifndef MS_WINDOWS
1820int
1821_Py_isabs(const wchar_t *path)
1822{
1823 return (path[0] == SEP);
1824}
1825#endif
1826
1827
1828/* Get an absolute path.
1829 On error (ex: fail to get the current directory), return -1.
1830 On memory allocation failure, set *abspath_p to NULL and return 0.
1831 On success, return a newly allocated to *abspath_p to and return 0.
1832 The string must be freed by PyMem_RawFree(). */
1833int
1834_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1835{
1836#ifdef MS_WINDOWS
1837 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1838 DWORD result;
1839
1840 result = GetFullPathNameW(path,
1841 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1842 NULL);
1843 if (!result) {
1844 return -1;
1845 }
1846
1847 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1848 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1849 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1850 }
1851 else {
1852 woutbufp = NULL;
1853 }
1854 if (!woutbufp) {
1855 *abspath_p = NULL;
1856 return 0;
1857 }
1858
1859 result = GetFullPathNameW(path, result, woutbufp, NULL);
1860 if (!result) {
1861 PyMem_RawFree(woutbufp);
1862 return -1;
1863 }
1864 }
1865
1866 if (woutbufp != woutbuf) {
1867 *abspath_p = woutbufp;
1868 return 0;
1869 }
1870
1871 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1872 return 0;
1873#else
1874 if (_Py_isabs(path)) {
1875 *abspath_p = _PyMem_RawWcsdup(path);
1876 return 0;
1877 }
1878
1879 wchar_t cwd[MAXPATHLEN + 1];
1880 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
1881 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
1882 /* unable to get the current directory */
1883 return -1;
1884 }
1885
1886 size_t cwd_len = wcslen(cwd);
1887 size_t path_len = wcslen(path);
1888 size_t len = cwd_len + 1 + path_len + 1;
1889 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1890 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
1891 }
1892 else {
1893 *abspath_p = NULL;
1894 }
1895 if (*abspath_p == NULL) {
1896 return 0;
1897 }
1898
1899 wchar_t *abspath = *abspath_p;
1900 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
1901 abspath += cwd_len;
1902
1903 *abspath = (wchar_t)SEP;
1904 abspath++;
1905
1906 memcpy(abspath, path, path_len * sizeof(wchar_t));
1907 abspath += path_len;
1908
1909 *abspath = 0;
1910 return 0;
1911#endif
1912}
1913
1914
Victor Stinnerfaddaed2019-03-19 02:58:14 +01001915/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001916 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001917
Victor Stinner1be0d112019-03-18 17:47:26 +01001918 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1919 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001920wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01001921_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001922{
1923#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01001924 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1925 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00001926#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001927 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001928 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001929 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001930
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001931 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001932 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001933 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001934 if (wname == NULL)
1935 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01001936 /* wname must have space to store the trailing NUL character */
1937 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001938 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001939 return NULL;
1940 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001941 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001942 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001943 return buf;
1944#endif
1945}
1946
Victor Stinnerdaf45552013-08-28 00:53:59 +02001947/* Duplicate a file descriptor. The new file descriptor is created as
1948 non-inheritable. Return a new file descriptor on success, raise an OSError
1949 exception and return -1 on error.
1950
1951 The GIL is released to call dup(). The caller must hold the GIL. */
1952int
1953_Py_dup(int fd)
1954{
1955#ifdef MS_WINDOWS
1956 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001957#endif
1958
Victor Stinner8a1be612016-03-14 22:07:55 +01001959 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001960
Victor Stinnerdaf45552013-08-28 00:53:59 +02001961#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001962 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001963 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001964 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001965 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001966 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001967 return -1;
1968 }
1969
Victor Stinnerdaf45552013-08-28 00:53:59 +02001970 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001971 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001972 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001973 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001974 Py_END_ALLOW_THREADS
1975 if (fd < 0) {
1976 PyErr_SetFromErrno(PyExc_OSError);
1977 return -1;
1978 }
1979
Zackery Spytz28fca0c2019-06-17 01:17:14 -06001980 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1981 _Py_BEGIN_SUPPRESS_IPH
1982 close(fd);
1983 _Py_END_SUPPRESS_IPH
1984 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001985 }
1986#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1987 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001988 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001989 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001990 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001991 Py_END_ALLOW_THREADS
1992 if (fd < 0) {
1993 PyErr_SetFromErrno(PyExc_OSError);
1994 return -1;
1995 }
1996
1997#else
1998 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001999 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002000 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002001 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002002 Py_END_ALLOW_THREADS
2003 if (fd < 0) {
2004 PyErr_SetFromErrno(PyExc_OSError);
2005 return -1;
2006 }
2007
2008 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002009 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002010 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002011 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002012 return -1;
2013 }
2014#endif
2015 return fd;
2016}
2017
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002018#ifndef MS_WINDOWS
2019/* Get the blocking mode of the file descriptor.
2020 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2021 raise an exception and return -1 on error. */
2022int
2023_Py_get_blocking(int fd)
2024{
Steve Dower8fc89802015-04-12 00:26:27 -04002025 int flags;
2026 _Py_BEGIN_SUPPRESS_IPH
2027 flags = fcntl(fd, F_GETFL, 0);
2028 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002029 if (flags < 0) {
2030 PyErr_SetFromErrno(PyExc_OSError);
2031 return -1;
2032 }
2033
2034 return !(flags & O_NONBLOCK);
2035}
2036
2037/* Set the blocking mode of the specified file descriptor.
2038
2039 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2040 otherwise.
2041
2042 Return 0 on success, raise an exception and return -1 on error. */
2043int
2044_Py_set_blocking(int fd, int blocking)
2045{
pxinwr06afac62020-12-08 04:41:12 +08002046/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2047 Use fcntl() instead. */
2048#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002049 int arg = !blocking;
2050 if (ioctl(fd, FIONBIO, &arg) < 0)
2051 goto error;
2052#else
2053 int flags, res;
2054
Steve Dower8fc89802015-04-12 00:26:27 -04002055 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002056 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002057 if (flags >= 0) {
2058 if (blocking)
2059 flags = flags & (~O_NONBLOCK);
2060 else
2061 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002062
Steve Dower8fc89802015-04-12 00:26:27 -04002063 res = fcntl(fd, F_SETFL, flags);
2064 } else {
2065 res = -1;
2066 }
2067 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002068
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002069 if (res < 0)
2070 goto error;
2071#endif
2072 return 0;
2073
2074error:
2075 PyErr_SetFromErrno(PyExc_OSError);
2076 return -1;
2077}
2078#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002079
2080
2081int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002082_Py_GetLocaleconvNumeric(struct lconv *lc,
2083 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002084{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002085 assert(decimal_point != NULL);
2086 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002087
TIGirardif2312032020-10-20 08:39:52 -03002088#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002089 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002090 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002091 change_locale = 1;
2092 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002093 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002094 change_locale = 1;
2095 }
2096
2097 /* Keep a copy of the LC_CTYPE locale */
2098 char *oldloc = NULL, *loc = NULL;
2099 if (change_locale) {
2100 oldloc = setlocale(LC_CTYPE, NULL);
2101 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002102 PyErr_SetString(PyExc_RuntimeWarning,
2103 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002104 return -1;
2105 }
2106
2107 oldloc = _PyMem_Strdup(oldloc);
2108 if (!oldloc) {
2109 PyErr_NoMemory();
2110 return -1;
2111 }
2112
2113 loc = setlocale(LC_NUMERIC, NULL);
2114 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2115 loc = NULL;
2116 }
2117
2118 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002119 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002120 if LC_NUMERIC locale is different than LC_CTYPE locale and
2121 decimal_point and/or thousands_sep are non-ASCII or longer than
2122 1 byte */
2123 setlocale(LC_CTYPE, loc);
2124 }
2125 }
2126
TIGirardif2312032020-10-20 08:39:52 -03002127#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2128#else /* MS_WINDOWS */
2129/* Use _W_* fields of Windows strcut lconv */
2130#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2131#endif /* MS_WINDOWS */
2132
Victor Stinner02e6bf72018-11-20 16:20:16 +01002133 int res = -1;
2134
TIGirardif2312032020-10-20 08:39:52 -03002135 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002136 if (*decimal_point == NULL) {
2137 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002138 }
2139
TIGirardif2312032020-10-20 08:39:52 -03002140 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002141 if (*thousands_sep == NULL) {
2142 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002143 }
2144
2145 res = 0;
2146
Victor Stinner02e6bf72018-11-20 16:20:16 +01002147done:
TIGirardif2312032020-10-20 08:39:52 -03002148#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002149 if (loc != NULL) {
2150 setlocale(LC_CTYPE, oldloc);
2151 }
2152 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002153#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002154 return res;
TIGirardif2312032020-10-20 08:39:52 -03002155
2156#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002157}
Kyle Evans79925792020-10-13 15:04:44 -05002158
2159/* Our selection logic for which function to use is as follows:
2160 * 1. If close_range(2) is available, always prefer that; it's better for
2161 * contiguous ranges like this than fdwalk(3) which entails iterating over
2162 * the entire fd space and simply doing nothing for those outside the range.
2163 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2164 * closing up to sysconf(_SC_OPEN_MAX).
2165 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2166 * as that will be more performant if the range happens to have any chunk of
2167 * non-opened fd in the middle.
2168 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2169 */
2170#ifdef __FreeBSD__
2171# define USE_CLOSEFROM
2172#endif /* __FreeBSD__ */
2173
2174#ifdef HAVE_FDWALK
2175# define USE_FDWALK
2176#endif /* HAVE_FDWALK */
2177
2178#ifdef USE_FDWALK
2179static int
2180_fdwalk_close_func(void *lohi, int fd)
2181{
2182 int lo = ((int *)lohi)[0];
2183 int hi = ((int *)lohi)[1];
2184
2185 if (fd >= hi) {
2186 return 1;
2187 }
2188 else if (fd >= lo) {
2189 /* Ignore errors */
2190 (void)close(fd);
2191 }
2192 return 0;
2193}
2194#endif /* USE_FDWALK */
2195
2196/* Closes all file descriptors in [first, last], ignoring errors. */
2197void
2198_Py_closerange(int first, int last)
2199{
2200 first = Py_MAX(first, 0);
2201 _Py_BEGIN_SUPPRESS_IPH
2202#ifdef HAVE_CLOSE_RANGE
2203 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2204 /* Any errors encountered while closing file descriptors are ignored;
2205 * ENOSYS means no kernel support, though,
2206 * so we'll fallback to the other methods. */
2207 }
2208 else
2209#endif /* HAVE_CLOSE_RANGE */
2210#ifdef USE_CLOSEFROM
2211 if (last >= sysconf(_SC_OPEN_MAX)) {
2212 /* Any errors encountered while closing file descriptors are ignored */
2213 closefrom(first);
2214 }
2215 else
2216#endif /* USE_CLOSEFROM */
2217#ifdef USE_FDWALK
2218 {
2219 int lohi[2];
2220 lohi[0] = first;
2221 lohi[1] = last + 1;
2222 fdwalk(_fdwalk_close_func, lohi);
2223 }
2224#else
2225 {
2226 for (int i = first; i <= last; i++) {
2227 /* Ignore errors */
2228 (void)close(i);
2229 }
2230 }
2231#endif /* USE_FDWALK */
2232 _Py_END_SUPPRESS_IPH
2233}