blob: f2b4681ea849c53a12fd9746b36a23ec080f94e8 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner710e8262020-10-31 01:02:09 +01002#include "pycore_fileutils.h" // fileutils definitions
3#include "pycore_runtime.h" // _PyRuntime
Victor Stinner361dcdc2020-04-15 03:24:57 +02004#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01005#include <locale.h>
6
Victor Stinnerb306d752010-10-07 22:09:40 +00007#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08008# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00009# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -040010extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000011#endif
Victor Stinner4e314432010-10-07 21:45:39 +000012
Brett Cannonefb00c02012-02-29 18:31:31 -050013#ifdef HAVE_LANGINFO_H
14#include <langinfo.h>
15#endif
16
Victor Stinnerdaf45552013-08-28 00:53:59 +020017#ifdef HAVE_SYS_IOCTL_H
18#include <sys/ioctl.h>
19#endif
20
21#ifdef HAVE_FCNTL_H
22#include <fcntl.h>
23#endif /* HAVE_FCNTL_H */
24
Victor Stinnerdaf45552013-08-28 00:53:59 +020025#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020026/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020027
28 -1: unknown
29 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
30 1: open() supports O_CLOEXEC flag, close-on-exec is set
31
Victor Stinnera555cfc2015-03-18 00:22:14 +010032 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
33 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020034int _Py_open_cloexec_works = -1;
35#endif
36
Victor Stinner3d4226a2018-08-29 22:21:32 +020037
38static int
39get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
40{
41 switch (errors)
42 {
43 case _Py_ERROR_STRICT:
44 *surrogateescape = 0;
45 return 0;
46 case _Py_ERROR_SURROGATEESCAPE:
47 *surrogateescape = 1;
48 return 0;
49 default:
50 return -1;
51 }
52}
53
54
Brett Cannonefb00c02012-02-29 18:31:31 -050055PyObject *
56_Py_device_encoding(int fd)
57{
Steve Dower8fc89802015-04-12 00:26:27 -040058 int valid;
59 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070060 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040061 _Py_END_SUPPRESS_IPH
62 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050063 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040064
Victor Stinner14b9b112013-06-25 00:37:25 +020065#if defined(MS_WINDOWS)
Victor Stinner35297182020-11-04 11:20:10 +010066 UINT cp;
Brett Cannonefb00c02012-02-29 18:31:31 -050067 if (fd == 0)
68 cp = GetConsoleCP();
69 else if (fd == 1 || fd == 2)
70 cp = GetConsoleOutputCP();
71 else
72 cp = 0;
73 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
74 has no console */
Victor Stinner35297182020-11-04 11:20:10 +010075 if (cp == 0) {
76 Py_RETURN_NONE;
Brett Cannonefb00c02012-02-29 18:31:31 -050077 }
Victor Stinner35297182020-11-04 11:20:10 +010078
79 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
80#else
81 return _Py_GetLocaleEncodingObject();
Brett Cannonefb00c02012-02-29 18:31:31 -050082#endif
Brett Cannonefb00c02012-02-29 18:31:31 -050083}
84
Victor Stinnere2510952019-05-02 11:28:57 -040085#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +010086
87#define USE_FORCE_ASCII
88
Victor Stinnerd45c7f82012-12-04 01:34:47 +010089extern int _Py_normalize_encoding(const char *, char *, size_t);
90
Victor Stinnerd500e532018-08-28 17:27:36 +020091/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
92 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +010093 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
94 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
95 locale.getpreferredencoding() codec. For example, if command line arguments
96 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
97 UnicodeEncodeError instead of retrieving the original byte string.
98
99 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
100 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
101 one byte in range 0x80-0xff can be decoded from the locale encoding. The
102 workaround is also enabled on error, for example if getting the locale
103 failed.
104
Victor Stinnerd500e532018-08-28 17:27:36 +0200105 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
106 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
107 ASCII encoding in this case.
108
Philip Jenvey215c49a2013-01-15 13:24:12 -0800109 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100110
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200111 1: the workaround is used: Py_EncodeLocale() uses
112 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100113 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200114 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
115 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 -1: unknown, need to call check_force_ascii() to get the value
117*/
118static int force_ascii = -1;
119
120static int
121check_force_ascii(void)
122{
Victor Stinnerd500e532018-08-28 17:27:36 +0200123 char *loc = setlocale(LC_CTYPE, NULL);
124 if (loc == NULL) {
125 goto error;
126 }
127 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
128 /* the LC_CTYPE locale is different than C and POSIX */
129 return 0;
130 }
131
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100132#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200133 const char *codeset = nl_langinfo(CODESET);
134 if (!codeset || codeset[0] == '\0') {
135 /* CODESET is not set or empty */
136 goto error;
137 }
138
Victor Stinner54de2b12016-09-09 23:11:52 -0700139 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200140 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
141 goto error;
142 }
143
144#ifdef __hpux
145 if (strcmp(encoding, "roman8") == 0) {
146 unsigned char ch;
147 wchar_t wch;
148 size_t res;
149
150 ch = (unsigned char)0xA7;
151 res = mbstowcs(&wch, (char*)&ch, 1);
152 if (res != (size_t)-1 && wch == L'\xA7') {
153 /* On HP-UX withe C locale or the POSIX locale,
154 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
155 Latin1 encoding in practice. Force ASCII in this case.
156
157 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
158 return 1;
159 }
160 }
161#else
162 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100163 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700164 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100165 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700166 "ansi_x3.4_1968",
167 "ansi_x3.4_1986",
168 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100169 "cp367",
170 "csascii",
171 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700172 "iso646_us",
173 "iso_646.irv_1991",
174 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100175 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700176 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100177 NULL
178 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100179
Victor Stinnerd500e532018-08-28 17:27:36 +0200180 int is_ascii = 0;
181 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100182 if (strcmp(encoding, *alias) == 0) {
183 is_ascii = 1;
184 break;
185 }
186 }
187 if (!is_ascii) {
188 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
189 return 0;
190 }
191
Victor Stinnerd500e532018-08-28 17:27:36 +0200192 for (unsigned int i=0x80; i<=0xff; i++) {
193 char ch[1];
194 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100195 size_t res;
196
Victor Stinnerd500e532018-08-28 17:27:36 +0200197 unsigned uch = (unsigned char)i;
198 ch[0] = (char)uch;
199 res = mbstowcs(wch, ch, 1);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100200 if (res != (size_t)-1) {
201 /* decoding a non-ASCII character from the locale encoding succeed:
202 the locale encoding is not ASCII, force ASCII */
203 return 1;
204 }
205 }
206 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
207 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200208#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100209 return 0;
210#else
211 /* nl_langinfo(CODESET) is not available: always force ASCII */
212 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200213#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100214
215error:
Martin Panter46f50722016-05-26 05:35:26 +0000216 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100217 return 1;
218}
219
Victor Stinnerd500e532018-08-28 17:27:36 +0200220
221int
222_Py_GetForceASCII(void)
223{
224 if (force_ascii == -1) {
225 force_ascii = check_force_ascii();
226 }
227 return force_ascii;
228}
229
230
Victor Stinner353933e2018-11-23 13:08:26 +0100231void
232_Py_ResetForceASCII(void)
233{
234 force_ascii = -1;
235}
236
237
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100238static int
239encode_ascii(const wchar_t *text, char **str,
240 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200241 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100242{
243 char *result = NULL, *out;
244 size_t len, i;
245 wchar_t ch;
246
Victor Stinner3d4226a2018-08-29 22:21:32 +0200247 int surrogateescape;
248 if (get_surrogateescape(errors, &surrogateescape) < 0) {
249 return -3;
250 }
251
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100252 len = wcslen(text);
253
Victor Stinner9bee3292017-12-21 16:49:13 +0100254 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100255 if (raw_malloc) {
256 result = PyMem_RawMalloc(len + 1);
257 }
258 else {
259 result = PyMem_Malloc(len + 1);
260 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100261 if (result == NULL) {
262 return -1;
263 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100264
265 out = result;
266 for (i=0; i<len; i++) {
267 ch = text[i];
268
269 if (ch <= 0x7f) {
270 /* ASCII character */
271 *out++ = (char)ch;
272 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100273 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100274 /* UTF-8b surrogate */
275 *out++ = (char)(ch - 0xdc00);
276 }
277 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100278 if (raw_malloc) {
279 PyMem_RawFree(result);
280 }
281 else {
282 PyMem_Free(result);
283 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100284 if (error_pos != NULL) {
285 *error_pos = i;
286 }
287 if (reason) {
288 *reason = "encoding error";
289 }
290 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100291 }
292 }
293 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100294 *str = result;
295 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100296}
Victor Stinnerd500e532018-08-28 17:27:36 +0200297#else
298int
299_Py_GetForceASCII(void)
300{
301 return 0;
302}
Victor Stinner353933e2018-11-23 13:08:26 +0100303
304void
305_Py_ResetForceASCII(void)
306{
307 /* nothing to do */
308}
Victor Stinnere2510952019-05-02 11:28:57 -0400309#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100310
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100311
312#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
313static int
314decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200315 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100316{
317 wchar_t *res;
318 unsigned char *in;
319 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600320 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100321
Victor Stinner3d4226a2018-08-29 22:21:32 +0200322 int surrogateescape;
323 if (get_surrogateescape(errors, &surrogateescape) < 0) {
324 return -3;
325 }
326
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100327 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
328 return -1;
329 }
330 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
331 if (!res) {
332 return -1;
333 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100334
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100335 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100336 for (in = (unsigned char*)arg; *in; in++) {
337 unsigned char ch = *in;
338 if (ch < 128) {
339 *out++ = ch;
340 }
341 else {
342 if (!surrogateescape) {
343 PyMem_RawFree(res);
344 if (wlen) {
345 *wlen = in - (unsigned char*)arg;
346 }
347 if (reason) {
348 *reason = "decoding error";
349 }
350 return -2;
351 }
352 *out++ = 0xdc00 + ch;
353 }
354 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100355 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100356
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100357 if (wlen != NULL) {
358 *wlen = out - res;
359 }
360 *wstr = res;
361 return 0;
362}
363#endif /* !HAVE_MBRTOWC */
364
365static int
366decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200367 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000368{
369 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100370 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000371 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200372#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000373 unsigned char *in;
374 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000375 mbstate_t mbs;
376#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100377
Victor Stinner3d4226a2018-08-29 22:21:32 +0200378 int surrogateescape;
379 if (get_surrogateescape(errors, &surrogateescape) < 0) {
380 return -3;
381 }
382
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100383#ifdef HAVE_BROKEN_MBSTOWCS
384 /* Some platforms have a broken implementation of
385 * mbstowcs which does not count the characters that
386 * would result from conversion. Use an upper bound.
387 */
388 argsize = strlen(arg);
389#else
390 argsize = mbstowcs(NULL, arg, 0);
391#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000392 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100393 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
394 return -1;
395 }
396 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
397 if (!res) {
398 return -1;
399 }
400
401 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000402 if (count != (size_t)-1) {
403 wchar_t *tmp;
404 /* Only use the result if it contains no
405 surrogate characters. */
406 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100407 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000408 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000409 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100410 if (wlen != NULL) {
411 *wlen = count;
412 }
413 *wstr = res;
414 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000415 }
Victor Stinner4e314432010-10-07 21:45:39 +0000416 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200417 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000418 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100419
Victor Stinner4e314432010-10-07 21:45:39 +0000420 /* Conversion failed. Fall back to escaping with surrogateescape. */
421#ifdef HAVE_MBRTOWC
422 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
423
424 /* Overallocate; as multi-byte characters are in the argument, the
425 actual output could use less memory. */
426 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100427 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
428 return -1;
429 }
430 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
431 if (!res) {
432 return -1;
433 }
434
Victor Stinner4e314432010-10-07 21:45:39 +0000435 in = (unsigned char*)arg;
436 out = res;
437 memset(&mbs, 0, sizeof mbs);
438 while (argsize) {
439 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100440 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000441 /* Reached end of string; null char stored. */
442 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100443 }
444
Victor Stinner4e314432010-10-07 21:45:39 +0000445 if (converted == (size_t)-2) {
446 /* Incomplete character. This should never happen,
447 since we provide everything that we have -
448 unless there is a bug in the C library, or I
449 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100450 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000451 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100452
Victor Stinner4e314432010-10-07 21:45:39 +0000453 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100454 if (!surrogateescape) {
455 goto decode_error;
456 }
457
Victor Stinner4e314432010-10-07 21:45:39 +0000458 /* Conversion error. Escape as UTF-8b, and start over
459 in the initial shift state. */
460 *out++ = 0xdc00 + *in++;
461 argsize--;
462 memset(&mbs, 0, sizeof mbs);
463 continue;
464 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100465
Victor Stinner76df43d2012-10-30 01:42:39 +0100466 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100467 if (!surrogateescape) {
468 goto decode_error;
469 }
470
Victor Stinner4e314432010-10-07 21:45:39 +0000471 /* Surrogate character. Escape the original
472 byte sequence with surrogateescape. */
473 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100474 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000475 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100476 }
Victor Stinner4e314432010-10-07 21:45:39 +0000477 continue;
478 }
479 /* successfully converted some bytes */
480 in += converted;
481 argsize -= converted;
482 out++;
483 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100484 if (wlen != NULL) {
485 *wlen = out - res;
486 }
487 *wstr = res;
488 return 0;
489
490decode_error:
491 PyMem_RawFree(res);
492 if (wlen) {
493 *wlen = in - (unsigned char*)arg;
494 }
495 if (reason) {
496 *reason = "decoding error";
497 }
498 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100499#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000500 /* Cannot use C locale for escaping; manually escape as if charset
501 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
502 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200503 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100504#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100505}
506
507
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100508/* Decode a byte string from the locale encoding.
509
510 Use the strict error handler if 'surrogateescape' is zero. Use the
511 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
512 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
513 can be decoded as a surrogate character, escape the bytes using the
514 surrogateescape error handler instead of decoding them.
515
Ville Skyttä61f82e02018-04-20 23:08:45 +0300516 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100517 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
518 the number of wide characters excluding the null character into *wlen.
519
520 On memory allocation failure, return -1.
521
522 On decoding error, return -2. If wlen is not NULL, write the start of
523 invalid byte sequence in the input string into *wlen. If reason is not NULL,
524 write the decoding error message into *reason.
525
Victor Stinner3d4226a2018-08-29 22:21:32 +0200526 Return -3 if the error handler 'errors' is not supported.
527
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100528 Use the Py_EncodeLocaleEx() function to encode the character string back to
529 a byte string. */
530int
531_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
532 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200533 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100534{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100535 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400536#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100537 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200538 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100539#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200540 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100541#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100542 }
543
Victor Stinnere2510952019-05-02 11:28:57 -0400544#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100545 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200546 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100547#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200548 int use_utf8 = (Py_UTF8Mode == 1);
549#ifdef MS_WINDOWS
550 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
551#endif
552 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200553 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
554 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100555 }
556
557#ifdef USE_FORCE_ASCII
558 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100559 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100560 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100561
562 if (force_ascii) {
563 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200564 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100565 }
566#endif
567
Victor Stinner3d4226a2018-08-29 22:21:32 +0200568 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400569#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100570}
571
572
Victor Stinner91106cd2017-12-13 12:29:09 +0100573/* Decode a byte string from the locale encoding with the
574 surrogateescape error handler: undecodable bytes are decoded as characters
575 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
576 character, escape the bytes using the surrogateescape error handler instead
577 of decoding them.
578
579 Return a pointer to a newly allocated wide character string, use
580 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
581 wide characters excluding the null character into *size
582
583 Return NULL on decoding error or memory allocation error. If *size* is not
584 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
585 decoding error.
586
587 Decoding errors should never happen, unless there is a bug in the C
588 library.
589
590 Use the Py_EncodeLocale() function to encode the character string back to a
591 byte string. */
592wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100593Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100594{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100595 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200596 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
597 NULL, 0,
598 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100599 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200600 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100601 if (wlen != NULL) {
602 *wlen = (size_t)res;
603 }
604 return NULL;
605 }
606 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100607}
Victor Stinner91106cd2017-12-13 12:29:09 +0100608
Victor Stinner91106cd2017-12-13 12:29:09 +0100609
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100610static int
611encode_current_locale(const wchar_t *text, char **str,
612 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200613 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100614{
Victor Stinner4e314432010-10-07 21:45:39 +0000615 const size_t len = wcslen(text);
616 char *result = NULL, *bytes = NULL;
617 size_t i, size, converted;
618 wchar_t c, buf[2];
619
Victor Stinner3d4226a2018-08-29 22:21:32 +0200620 int surrogateescape;
621 if (get_surrogateescape(errors, &surrogateescape) < 0) {
622 return -3;
623 }
624
Victor Stinner4e314432010-10-07 21:45:39 +0000625 /* The function works in two steps:
626 1. compute the length of the output buffer in bytes (size)
627 2. outputs the bytes */
628 size = 0;
629 buf[1] = 0;
630 while (1) {
631 for (i=0; i < len; i++) {
632 c = text[i];
633 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100634 if (!surrogateescape) {
635 goto encode_error;
636 }
Victor Stinner4e314432010-10-07 21:45:39 +0000637 /* UTF-8b surrogate */
638 if (bytes != NULL) {
639 *bytes++ = c - 0xdc00;
640 size--;
641 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100642 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000643 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100644 }
Victor Stinner4e314432010-10-07 21:45:39 +0000645 continue;
646 }
647 else {
648 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100649 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000650 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 }
652 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000653 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100654 }
Victor Stinner4e314432010-10-07 21:45:39 +0000655 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100656 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000657 }
658 if (bytes != NULL) {
659 bytes += converted;
660 size -= converted;
661 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100662 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000663 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100664 }
Victor Stinner4e314432010-10-07 21:45:39 +0000665 }
666 }
667 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100668 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000669 break;
670 }
671
672 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100673 if (raw_malloc) {
674 result = PyMem_RawMalloc(size);
675 }
676 else {
677 result = PyMem_Malloc(size);
678 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100679 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100680 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100681 }
Victor Stinner4e314432010-10-07 21:45:39 +0000682 bytes = result;
683 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100684 *str = result;
685 return 0;
686
687encode_error:
688 if (raw_malloc) {
689 PyMem_RawFree(result);
690 }
691 else {
692 PyMem_Free(result);
693 }
694 if (error_pos != NULL) {
695 *error_pos = i;
696 }
697 if (reason) {
698 *reason = "encoding error";
699 }
700 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100701}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100702
Victor Stinner3d4226a2018-08-29 22:21:32 +0200703
704/* Encode a string to the locale encoding.
705
706 Parameters:
707
708 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
709 of PyMem_Malloc().
710 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
711 Python filesystem encoding.
712 * errors: error handler like "strict" or "surrogateescape".
713
714 Return value:
715
716 0: success, *str is set to a newly allocated decoded string.
717 -1: memory allocation failure
718 -2: encoding error, set *error_pos and *reason (if set).
719 -3: the error handler 'errors' is not supported.
720 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100721static int
722encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
723 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200724 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100725{
726 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400727#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100728 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200729 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100730#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100731 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200732 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100733#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100734 }
735
Victor Stinnere2510952019-05-02 11:28:57 -0400736#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100737 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200738 raw_malloc, errors);
739#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200740 int use_utf8 = (Py_UTF8Mode == 1);
741#ifdef MS_WINDOWS
742 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
743#endif
744 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100745 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200746 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100747 }
748
749#ifdef USE_FORCE_ASCII
750 if (force_ascii == -1) {
751 force_ascii = check_force_ascii();
752 }
753
754 if (force_ascii) {
755 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200756 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100757 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100758#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100759
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100760 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200761 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400762#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100763}
764
Victor Stinner9dd76202017-12-21 16:20:32 +0100765static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100766encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100767 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100768{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100769 char *str;
770 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200771 raw_malloc, current_locale,
772 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100773 if (res != -2 && error_pos) {
774 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100775 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100776 if (res != 0) {
777 return NULL;
778 }
779 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100780}
781
Victor Stinner91106cd2017-12-13 12:29:09 +0100782/* Encode a wide character string to the locale encoding with the
783 surrogateescape error handler: surrogate characters in the range
784 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
785
786 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
787 the memory. Return NULL on encoding or memory allocation error.
788
789 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
790 to the index of the invalid character on encoding error.
791
792 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
793 character string. */
794char*
795Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
796{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100797 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100798}
Victor Stinner91106cd2017-12-13 12:29:09 +0100799
Victor Stinner91106cd2017-12-13 12:29:09 +0100800
Victor Stinner9dd76202017-12-21 16:20:32 +0100801/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
802 instead of PyMem_Free(). */
803char*
804_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
805{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100806 return encode_locale(text, error_pos, 1, 0);
807}
808
809
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100810int
811_Py_EncodeLocaleEx(const wchar_t *text, char **str,
812 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200813 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100814{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100815 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200816 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000817}
818
Victor Stinner6672d0c2010-10-07 22:53:43 +0000819
Victor Stinner82458b62020-11-01 20:59:35 +0100820// Get the current locale encoding name:
821//
822// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
823// - Return "UTF-8" if the UTF-8 Mode is enabled
824// - On Windows, return the ANSI code page (ex: "cp1250")
Victor Stinnere662c392020-11-01 23:07:23 +0100825// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
Victor Stinner82458b62020-11-01 20:59:35 +0100826// - Otherwise, return nl_langinfo(CODESET).
827//
Victor Stinnere662c392020-11-01 23:07:23 +0100828// Return NULL on memory allocation failure.
Victor Stinner82458b62020-11-01 20:59:35 +0100829//
Victor Stinner710e8262020-10-31 01:02:09 +0100830// See also config_get_locale_encoding()
Victor Stinner82458b62020-11-01 20:59:35 +0100831wchar_t*
Victor Stinnere662c392020-11-01 23:07:23 +0100832_Py_GetLocaleEncoding(void)
Victor Stinner710e8262020-10-31 01:02:09 +0100833{
834#ifdef _Py_FORCE_UTF8_LOCALE
835 // On Android langinfo.h and CODESET are missing,
836 // and UTF-8 is always used in mbstowcs() and wcstombs().
Victor Stinner82458b62020-11-01 20:59:35 +0100837 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100838#else
839 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
840 if (preconfig->utf8_mode) {
Victor Stinner82458b62020-11-01 20:59:35 +0100841 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100842 }
843
Victor Stinner82458b62020-11-01 20:59:35 +0100844#ifdef MS_WINDOWS
845 wchar_t encoding[23];
846 unsigned int ansi_codepage = GetACP();
847 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
848 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
849 return _PyMem_RawWcsdup(encoding);
Victor Stinner710e8262020-10-31 01:02:09 +0100850#else
851 const char *encoding = nl_langinfo(CODESET);
852 if (!encoding || encoding[0] == '\0') {
Victor Stinnere662c392020-11-01 23:07:23 +0100853 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
854 // macOS if the LC_CTYPE locale is not supported.
Victor Stinner82458b62020-11-01 20:59:35 +0100855 return _PyMem_RawWcsdup(L"UTF-8");
Victor Stinner710e8262020-10-31 01:02:09 +0100856 }
Victor Stinner710e8262020-10-31 01:02:09 +0100857
Victor Stinner82458b62020-11-01 20:59:35 +0100858 wchar_t *wstr;
859 int res = decode_current_locale(encoding, &wstr, NULL,
Victor Stinnere662c392020-11-01 23:07:23 +0100860 NULL, _Py_ERROR_SURROGATEESCAPE);
Victor Stinner82458b62020-11-01 20:59:35 +0100861 if (res < 0) {
862 return NULL;
863 }
864 return wstr;
865#endif // !MS_WINDOWS
866
867#endif // !_Py_FORCE_UTF8_LOCALE
868}
869
870
871PyObject *
872_Py_GetLocaleEncodingObject(void)
873{
Victor Stinnere662c392020-11-01 23:07:23 +0100874 wchar_t *encoding = _Py_GetLocaleEncoding();
Victor Stinner82458b62020-11-01 20:59:35 +0100875 if (encoding == NULL) {
Victor Stinnere662c392020-11-01 23:07:23 +0100876 PyErr_NoMemory();
Victor Stinner82458b62020-11-01 20:59:35 +0100877 return NULL;
878 }
879
880 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
881 PyMem_RawFree(encoding);
882 return str;
Victor Stinner710e8262020-10-31 01:02:09 +0100883}
884
885
Steve Dowerf2f373f2015-02-21 08:44:05 -0800886#ifdef MS_WINDOWS
887static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
888
889static void
890FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
891{
892 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
893 /* Cannot simply cast and dereference in_ptr,
894 since it might not be aligned properly */
895 __int64 in;
896 memcpy(&in, in_ptr, sizeof(in));
897 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
898 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
899}
900
901void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800902_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800903{
904 /* XXX endianness */
905 __int64 out;
906 out = time_in + secs_between_epochs;
907 out = out * 10000000 + nsec_in / 100;
908 memcpy(out_ptr, &out, sizeof(out));
909}
910
911/* Below, we *know* that ugo+r is 0444 */
912#if _S_IREAD != 0400
913#error Unsupported C library
914#endif
915static int
916attributes_to_mode(DWORD attr)
917{
918 int m = 0;
919 if (attr & FILE_ATTRIBUTE_DIRECTORY)
920 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
921 else
922 m |= _S_IFREG;
923 if (attr & FILE_ATTRIBUTE_READONLY)
924 m |= 0444;
925 else
926 m |= 0666;
927 return m;
928}
929
Steve Dowerbf1f3762015-02-21 15:26:02 -0800930void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200931_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
932 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800933{
934 memset(result, 0, sizeof(*result));
935 result->st_mode = attributes_to_mode(info->dwFileAttributes);
936 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
937 result->st_dev = info->dwVolumeSerialNumber;
938 result->st_rdev = result->st_dev;
939 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
940 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
941 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
942 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100943 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -0700944 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
945 open other name surrogate reparse points without traversing them. To
946 detect/handle these, check st_file_attributes and st_reparse_tag. */
947 result->st_reparse_tag = reparse_tag;
948 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
949 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -0800950 /* first clear the S_IFMT bits */
951 result->st_mode ^= (result->st_mode & S_IFMT);
952 /* now set the bits that make this a symlink */
953 result->st_mode |= S_IFLNK;
954 }
955 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800956}
957#endif
958
959/* Return information about a file.
960
961 On POSIX, use fstat().
962
963 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800964 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
965 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800966 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200967
968 On Windows, set the last Windows error and return nonzero on error. On
969 POSIX, set errno and return nonzero on error. Fill status and return 0 on
970 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800971int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200972_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800973{
974#ifdef MS_WINDOWS
975 BY_HANDLE_FILE_INFORMATION info;
976 HANDLE h;
977 int type;
978
Steve Dower940f33a2016-09-08 11:21:54 -0700979 _Py_BEGIN_SUPPRESS_IPH
980 h = (HANDLE)_get_osfhandle(fd);
981 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800982
983 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400984 /* errno is already set by _get_osfhandle, but we also set
985 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800986 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800987 return -1;
988 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200989 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800990
991 type = GetFileType(h);
992 if (type == FILE_TYPE_UNKNOWN) {
993 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400994 if (error != 0) {
995 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800996 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400997 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800998 /* else: valid but unknown file */
999 }
1000
1001 if (type != FILE_TYPE_DISK) {
1002 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001003 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001004 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +02001005 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001006 return 0;
1007 }
1008
1009 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -04001010 /* The Win32 error is already set, but we also set errno for
1011 callers who expect it */
1012 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -08001013 return -1;
1014 }
1015
Victor Stinnere134a7f2015-03-30 10:09:31 +02001016 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001017 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +01001018 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -08001019 return 0;
1020#else
Victor Stinnere134a7f2015-03-30 10:09:31 +02001021 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -08001022#endif
1023}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001024
Victor Stinnere134a7f2015-03-30 10:09:31 +02001025/* Return information about a file.
1026
1027 On POSIX, use fstat().
1028
1029 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -08001030 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1031 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +02001032 #23152.
1033
1034 Raise an exception and return -1 on error. On Windows, set the last Windows
1035 error on error. On POSIX, set errno on error. Fill status and return 0 on
1036 success.
1037
Victor Stinner6f4fae82015-04-01 18:34:32 +02001038 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1039 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +02001040int
1041_Py_fstat(int fd, struct _Py_stat_struct *status)
1042{
1043 int res;
1044
Victor Stinner8a1be612016-03-14 22:07:55 +01001045 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001046
Victor Stinnere134a7f2015-03-30 10:09:31 +02001047 Py_BEGIN_ALLOW_THREADS
1048 res = _Py_fstat_noraise(fd, status);
1049 Py_END_ALLOW_THREADS
1050
1051 if (res != 0) {
1052#ifdef MS_WINDOWS
1053 PyErr_SetFromWindowsErr(0);
1054#else
1055 PyErr_SetFromErrno(PyExc_OSError);
1056#endif
1057 return -1;
1058 }
1059 return 0;
1060}
Steve Dowerf2f373f2015-02-21 08:44:05 -08001061
Victor Stinner6672d0c2010-10-07 22:53:43 +00001062/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1063 call stat() otherwise. Only fill st_mode attribute on Windows.
1064
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001065 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1066 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001067
1068int
Victor Stinnera4a75952010-10-07 22:23:10 +00001069_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001070{
1071#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001072 int err;
1073 struct _stat wstatbuf;
1074
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001075#if USE_UNICODE_WCHAR_CACHE
1076 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1077#else /* USE_UNICODE_WCHAR_CACHE */
1078 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1079#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001080 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001081 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001082
Victor Stinneree587ea2011-11-17 00:51:38 +01001083 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001084 if (!err)
1085 statbuf->st_mode = wstatbuf.st_mode;
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001086#if !USE_UNICODE_WCHAR_CACHE
1087 PyMem_Free(wpath);
1088#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001089 return err;
1090#else
1091 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001092 PyObject *bytes;
1093 char *cpath;
1094
1095 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001096 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001097 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001098
1099 /* check for embedded null bytes */
1100 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1101 Py_DECREF(bytes);
1102 return -2;
1103 }
1104
1105 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001106 Py_DECREF(bytes);
1107 return ret;
1108#endif
1109}
1110
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001111
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001112/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001113static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001114get_inheritable(int fd, int raise)
1115{
1116#ifdef MS_WINDOWS
1117 HANDLE handle;
1118 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001119
Steve Dower8fc89802015-04-12 00:26:27 -04001120 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001121 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001122 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001123 if (handle == INVALID_HANDLE_VALUE) {
1124 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001125 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001126 return -1;
1127 }
1128
1129 if (!GetHandleInformation(handle, &flags)) {
1130 if (raise)
1131 PyErr_SetFromWindowsErr(0);
1132 return -1;
1133 }
1134
1135 return (flags & HANDLE_FLAG_INHERIT);
1136#else
1137 int flags;
1138
1139 flags = fcntl(fd, F_GETFD, 0);
1140 if (flags == -1) {
1141 if (raise)
1142 PyErr_SetFromErrno(PyExc_OSError);
1143 return -1;
1144 }
1145 return !(flags & FD_CLOEXEC);
1146#endif
1147}
1148
1149/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001150 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001151 raise an exception and return -1 on error. */
1152int
1153_Py_get_inheritable(int fd)
1154{
1155 return get_inheritable(fd, 1);
1156}
1157
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001158
1159/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001160static int
1161set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1162{
1163#ifdef MS_WINDOWS
1164 HANDLE handle;
1165 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001166#else
1167#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1168 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001169 int request;
1170 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001171#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001172 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001173 int res;
1174#endif
1175
1176 /* atomic_flag_works can only be used to make the file descriptor
1177 non-inheritable */
1178 assert(!(atomic_flag_works != NULL && inheritable));
1179
1180 if (atomic_flag_works != NULL && !inheritable) {
1181 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001182 int isInheritable = get_inheritable(fd, raise);
1183 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001184 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001185 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001186 }
1187
1188 if (*atomic_flag_works)
1189 return 0;
1190 }
1191
1192#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001193 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001194 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001195 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001196 if (handle == INVALID_HANDLE_VALUE) {
1197 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001198 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001199 return -1;
1200 }
1201
1202 if (inheritable)
1203 flags = HANDLE_FLAG_INHERIT;
1204 else
1205 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001206
1207 /* This check can be removed once support for Windows 7 ends. */
1208#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1209 GetFileType(handle) == FILE_TYPE_CHAR)
1210
1211 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1212 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001213 if (raise)
1214 PyErr_SetFromWindowsErr(0);
1215 return -1;
1216 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001217#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001218 return 0;
1219
Victor Stinnerdaf45552013-08-28 00:53:59 +02001220#else
Victor Stinner282124b2014-09-02 11:41:04 +02001221
1222#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001223 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001224 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001225 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1226 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001227 if (inheritable)
1228 request = FIONCLEX;
1229 else
1230 request = FIOCLEX;
1231 err = ioctl(fd, request, NULL);
1232 if (!err) {
1233 ioctl_works = 1;
1234 return 0;
1235 }
1236
cptpcrd7dc71c42021-01-20 09:05:51 -05001237#ifdef __linux__
1238 if (errno == EBADF) {
1239 // On Linux, ioctl(FIOCLEX) will fail with EBADF for O_PATH file descriptors
1240 // Fall through to the fcntl() path
1241 }
1242 else
1243#endif
Victor Stinner3116cc42016-05-19 16:46:18 +02001244 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001245 if (raise)
1246 PyErr_SetFromErrno(PyExc_OSError);
1247 return -1;
1248 }
1249 else {
1250 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1251 device". The ioctl is declared but not supported by the kernel.
1252 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001253 Illumos-based OS for example.
1254
1255 Issue #27057: When SELinux policy disallows ioctl it will fail
1256 with EACCES. While FIOCLEX is safe operation it may be
1257 unavailable because ioctl was denied altogether.
1258 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001259 ioctl_works = 0;
1260 }
1261 /* fallback to fcntl() if ioctl() does not work */
1262 }
1263#endif
1264
1265 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001266 flags = fcntl(fd, F_GETFD);
1267 if (flags < 0) {
1268 if (raise)
1269 PyErr_SetFromErrno(PyExc_OSError);
1270 return -1;
1271 }
1272
Victor Stinnera858bbd2016-04-17 16:51:52 +02001273 if (inheritable) {
1274 new_flags = flags & ~FD_CLOEXEC;
1275 }
1276 else {
1277 new_flags = flags | FD_CLOEXEC;
1278 }
1279
1280 if (new_flags == flags) {
1281 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1282 return 0;
1283 }
1284
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001285 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001286 if (res < 0) {
1287 if (raise)
1288 PyErr_SetFromErrno(PyExc_OSError);
1289 return -1;
1290 }
1291 return 0;
1292#endif
1293}
1294
1295/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001296 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001297static int
1298make_non_inheritable(int fd)
1299{
1300 return set_inheritable(fd, 0, 0, NULL);
1301}
1302
1303/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001304 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001305
1306 If atomic_flag_works is not NULL:
1307
1308 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1309 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1310 set the inheritable flag
1311 * if *atomic_flag_works==1: do nothing
1312 * if *atomic_flag_works==0: set inheritable flag to False
1313
1314 Set atomic_flag_works to NULL if no atomic flag was used to create the
1315 file descriptor.
1316
1317 atomic_flag_works can only be used to make a file descriptor
1318 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1319int
1320_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1321{
1322 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1323}
1324
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001325/* Same as _Py_set_inheritable() but on error, set errno and
1326 don't raise an exception.
1327 This function is async-signal-safe. */
1328int
1329_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1330{
1331 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1332}
1333
Victor Stinnera555cfc2015-03-18 00:22:14 +01001334static int
1335_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001336{
1337 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001338 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001339#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001340 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001341#endif
1342
1343#ifdef MS_WINDOWS
1344 flags |= O_NOINHERIT;
1345#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001346 atomic_flag_works = &_Py_open_cloexec_works;
1347 flags |= O_CLOEXEC;
1348#else
1349 atomic_flag_works = NULL;
1350#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001351
Victor Stinnera555cfc2015-03-18 00:22:14 +01001352 if (gil_held) {
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001353 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1354 if (pathname_obj == NULL) {
1355 return -1;
1356 }
1357 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1358 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001359 return -1;
1360 }
1361
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001362 do {
1363 Py_BEGIN_ALLOW_THREADS
1364 fd = open(pathname, flags);
1365 Py_END_ALLOW_THREADS
1366 } while (fd < 0
1367 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001368 if (async_err) {
1369 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001370 return -1;
1371 }
Serhiy Storchaka6c6810d2020-06-24 08:46:05 +03001372 if (fd < 0) {
1373 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1374 Py_DECREF(pathname_obj);
1375 return -1;
1376 }
1377 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001378 }
1379 else {
1380 fd = open(pathname, flags);
1381 if (fd < 0)
1382 return -1;
1383 }
1384
1385#ifndef MS_WINDOWS
1386 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001387 close(fd);
1388 return -1;
1389 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001390#endif
1391
Victor Stinnerdaf45552013-08-28 00:53:59 +02001392 return fd;
1393}
1394
Victor Stinnera555cfc2015-03-18 00:22:14 +01001395/* Open a file with the specified flags (wrapper to open() function).
1396 Return a file descriptor on success. Raise an exception and return -1 on
1397 error.
1398
1399 The file descriptor is created non-inheritable.
1400
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001401 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1402 except if the Python signal handler raises an exception.
1403
Victor Stinner6f4fae82015-04-01 18:34:32 +02001404 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001405int
1406_Py_open(const char *pathname, int flags)
1407{
1408 /* _Py_open() must be called with the GIL held. */
1409 assert(PyGILState_Check());
1410 return _Py_open_impl(pathname, flags, 1);
1411}
1412
1413/* Open a file with the specified flags (wrapper to open() function).
1414 Return a file descriptor on success. Set errno and return -1 on error.
1415
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001416 The file descriptor is created non-inheritable.
1417
1418 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001419int
1420_Py_open_noraise(const char *pathname, int flags)
1421{
1422 return _Py_open_impl(pathname, flags, 0);
1423}
1424
Victor Stinnerdaf45552013-08-28 00:53:59 +02001425/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001426 encoding and use fopen() otherwise.
1427
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001428 The file descriptor is created non-inheritable.
1429
1430 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001431FILE *
1432_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1433{
Victor Stinner4e314432010-10-07 21:45:39 +00001434 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001435 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1436 return NULL;
1437 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001438#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001439 char *cpath;
1440 char cmode[10];
1441 size_t r;
1442 r = wcstombs(cmode, mode, 10);
1443 if (r == (size_t)-1 || r >= 10) {
1444 errno = EINVAL;
1445 return NULL;
1446 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001447 cpath = _Py_EncodeLocaleRaw(path, NULL);
1448 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001449 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001450 }
Victor Stinner4e314432010-10-07 21:45:39 +00001451 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001452 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001453#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001454 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001455#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001456 if (f == NULL)
1457 return NULL;
1458 if (make_non_inheritable(fileno(f)) < 0) {
1459 fclose(f);
1460 return NULL;
1461 }
1462 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001463}
1464
Victor Stinnerdaf45552013-08-28 00:53:59 +02001465
1466/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001467 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001468
Victor Stinnere42ccd22015-03-18 01:39:23 +01001469 Return the new file object on success. Raise an exception and return NULL
1470 on error.
1471
1472 The file descriptor is created non-inheritable.
1473
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001474 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1475 except if the Python signal handler raises an exception.
1476
Victor Stinner6f4fae82015-04-01 18:34:32 +02001477 Release the GIL to call _wfopen() or fopen(). The caller must hold
1478 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001479FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001480_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001481{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001482 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001483 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001484#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001485 wchar_t wmode[10];
1486 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001487
Victor Stinnere42ccd22015-03-18 01:39:23 +01001488 assert(PyGILState_Check());
1489
Steve Dowerb82e17e2019-05-23 08:45:22 -07001490 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1491 return NULL;
1492 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001493 if (!PyUnicode_Check(path)) {
1494 PyErr_Format(PyExc_TypeError,
1495 "str file path expected under Windows, got %R",
1496 Py_TYPE(path));
1497 return NULL;
1498 }
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001499#if USE_UNICODE_WCHAR_CACHE
1500 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1501#else /* USE_UNICODE_WCHAR_CACHE */
1502 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1503#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinneree587ea2011-11-17 00:51:38 +01001504 if (wpath == NULL)
1505 return NULL;
1506
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001507 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1508 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001509 if (usize == 0) {
1510 PyErr_SetFromWindowsErr(0);
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001511#if !USE_UNICODE_WCHAR_CACHE
1512 PyMem_Free(wpath);
1513#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001514 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001515 }
Victor Stinner4e314432010-10-07 21:45:39 +00001516
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001517 do {
1518 Py_BEGIN_ALLOW_THREADS
1519 f = _wfopen(wpath, wmode);
1520 Py_END_ALLOW_THREADS
1521 } while (f == NULL
1522 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Serhiy Storchaka4c8f09d2020-07-10 23:26:06 +03001523#if !USE_UNICODE_WCHAR_CACHE
1524 PyMem_Free(wpath);
1525#endif /* USE_UNICODE_WCHAR_CACHE */
Victor Stinner4e314432010-10-07 21:45:39 +00001526#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001527 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001528 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001529
1530 assert(PyGILState_Check());
1531
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001532 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001533 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001534 path_bytes = PyBytes_AS_STRING(bytes);
1535
Steve Dowerb82e17e2019-05-23 08:45:22 -07001536 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Christian Heimes96729122020-06-13 17:57:22 +02001537 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001538 return NULL;
1539 }
1540
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001541 do {
1542 Py_BEGIN_ALLOW_THREADS
1543 f = fopen(path_bytes, mode);
1544 Py_END_ALLOW_THREADS
1545 } while (f == NULL
1546 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001547
Victor Stinner4e314432010-10-07 21:45:39 +00001548 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001549#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001550 if (async_err)
1551 return NULL;
1552
Victor Stinnere42ccd22015-03-18 01:39:23 +01001553 if (f == NULL) {
1554 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001555 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001556 }
1557
1558 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001559 fclose(f);
1560 return NULL;
1561 }
1562 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001563}
1564
Victor Stinner66aab0c2015-03-19 22:53:20 +01001565/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001566
1567 On success, return the number of read bytes, it can be lower than count.
1568 If the current file offset is at or past the end of file, no bytes are read,
1569 and read() returns zero.
1570
1571 On error, raise an exception, set errno and return -1.
1572
1573 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1574 If the Python signal handler raises an exception, the function returns -1
1575 (the syscall is not retried).
1576
1577 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001578Py_ssize_t
1579_Py_read(int fd, void *buf, size_t count)
1580{
1581 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001582 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001583 int async_err = 0;
1584
Victor Stinner8a1be612016-03-14 22:07:55 +01001585 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001586
Victor Stinner66aab0c2015-03-19 22:53:20 +01001587 /* _Py_read() must not be called with an exception set, otherwise the
1588 * caller may think that read() was interrupted by a signal and the signal
1589 * handler raised an exception. */
1590 assert(!PyErr_Occurred());
1591
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001592 if (count > _PY_READ_MAX) {
1593 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001594 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001595
Steve Dower8fc89802015-04-12 00:26:27 -04001596 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001597 do {
1598 Py_BEGIN_ALLOW_THREADS
1599 errno = 0;
1600#ifdef MS_WINDOWS
1601 n = read(fd, buf, (int)count);
1602#else
1603 n = read(fd, buf, count);
1604#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001605 /* save/restore errno because PyErr_CheckSignals()
1606 * and PyErr_SetFromErrno() can modify it */
1607 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001608 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001609 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001610 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001611 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001612
1613 if (async_err) {
1614 /* read() was interrupted by a signal (failed with EINTR)
1615 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001616 errno = err;
1617 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001618 return -1;
1619 }
1620 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001621 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001622 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001623 return -1;
1624 }
1625
1626 return n;
1627}
1628
Victor Stinner82c3e452015-04-01 18:34:45 +02001629static Py_ssize_t
1630_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001631{
1632 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001633 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001634 int async_err = 0;
1635
Steve Dower8fc89802015-04-12 00:26:27 -04001636 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001637#ifdef MS_WINDOWS
1638 if (count > 32767 && isatty(fd)) {
1639 /* Issue #11395: the Windows console returns an error (12: not
1640 enough space error) on writing into stdout if stdout mode is
1641 binary and the length is greater than 66,000 bytes (or less,
1642 depending on heap usage). */
1643 count = 32767;
1644 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001645#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001646 if (count > _PY_WRITE_MAX) {
1647 count = _PY_WRITE_MAX;
1648 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001649
Victor Stinner82c3e452015-04-01 18:34:45 +02001650 if (gil_held) {
1651 do {
1652 Py_BEGIN_ALLOW_THREADS
1653 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001654#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001655 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001656#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001657 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001658#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001659 /* save/restore errno because PyErr_CheckSignals()
1660 * and PyErr_SetFromErrno() can modify it */
1661 err = errno;
1662 Py_END_ALLOW_THREADS
1663 } while (n < 0 && err == EINTR &&
1664 !(async_err = PyErr_CheckSignals()));
1665 }
1666 else {
1667 do {
1668 errno = 0;
1669#ifdef MS_WINDOWS
1670 n = write(fd, buf, (int)count);
1671#else
1672 n = write(fd, buf, count);
1673#endif
1674 err = errno;
1675 } while (n < 0 && err == EINTR);
1676 }
Steve Dower8fc89802015-04-12 00:26:27 -04001677 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001678
1679 if (async_err) {
1680 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001681 and the Python signal handler raised an exception (if gil_held is
1682 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001683 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001684 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001685 return -1;
1686 }
1687 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001688 if (gil_held)
1689 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001690 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001691 return -1;
1692 }
1693
1694 return n;
1695}
1696
Victor Stinner82c3e452015-04-01 18:34:45 +02001697/* Write count bytes of buf into fd.
1698
1699 On success, return the number of written bytes, it can be lower than count
1700 including 0. On error, raise an exception, set errno and return -1.
1701
1702 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1703 If the Python signal handler raises an exception, the function returns -1
1704 (the syscall is not retried).
1705
1706 Release the GIL to call write(). The caller must hold the GIL. */
1707Py_ssize_t
1708_Py_write(int fd, const void *buf, size_t count)
1709{
Victor Stinner8a1be612016-03-14 22:07:55 +01001710 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001711
Victor Stinner82c3e452015-04-01 18:34:45 +02001712 /* _Py_write() must not be called with an exception set, otherwise the
1713 * caller may think that write() was interrupted by a signal and the signal
1714 * handler raised an exception. */
1715 assert(!PyErr_Occurred());
1716
1717 return _Py_write_impl(fd, buf, count, 1);
1718}
1719
1720/* Write count bytes of buf into fd.
1721 *
1722 * On success, return the number of written bytes, it can be lower than count
1723 * including 0. On error, set errno and return -1.
1724 *
1725 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1726 * without calling the Python signal handler. */
1727Py_ssize_t
1728_Py_write_noraise(int fd, const void *buf, size_t count)
1729{
1730 return _Py_write_impl(fd, buf, count, 0);
1731}
1732
Victor Stinner4e314432010-10-07 21:45:39 +00001733#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001734
1735/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001736 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001737
Victor Stinner1be0d112019-03-18 17:47:26 +01001738 Return -1 on encoding error, on readlink() error, if the internal buffer is
1739 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001740int
Victor Stinner1be0d112019-03-18 17:47:26 +01001741_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001742{
1743 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001744 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001745 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001746 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001747 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001748 size_t r1;
1749
Victor Stinner9dd76202017-12-21 16:20:32 +01001750 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001751 if (cpath == NULL) {
1752 errno = EINVAL;
1753 return -1;
1754 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001755 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001756 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001757 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001758 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001759 }
1760 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001761 errno = EINVAL;
1762 return -1;
1763 }
1764 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001765 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001766 if (wbuf == NULL) {
1767 errno = EINVAL;
1768 return -1;
1769 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001770 /* wbuf must have space to store the trailing NUL character */
1771 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001772 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001773 errno = EINVAL;
1774 return -1;
1775 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001776 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001777 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001778 return (int)r1;
1779}
1780#endif
1781
1782#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001783
1784/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001785 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001786
Victor Stinner1be0d112019-03-18 17:47:26 +01001787 Return NULL on encoding error, realpath() error, decoding error
1788 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001789wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001790_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001791 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001792{
1793 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001794 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001795 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001796 char *res;
1797 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001798 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001799 if (cpath == NULL) {
1800 errno = EINVAL;
1801 return NULL;
1802 }
1803 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001804 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001805 if (res == NULL)
1806 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001807
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001808 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001809 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001810 errno = EINVAL;
1811 return NULL;
1812 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001813 /* wresolved_path must have space to store the trailing NUL character */
1814 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001815 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001816 errno = EINVAL;
1817 return NULL;
1818 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001819 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001820 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001821 return resolved_path;
1822}
1823#endif
1824
Victor Stinner3939c322019-06-25 15:02:43 +02001825
1826#ifndef MS_WINDOWS
1827int
1828_Py_isabs(const wchar_t *path)
1829{
1830 return (path[0] == SEP);
1831}
1832#endif
1833
1834
1835/* Get an absolute path.
1836 On error (ex: fail to get the current directory), return -1.
1837 On memory allocation failure, set *abspath_p to NULL and return 0.
1838 On success, return a newly allocated to *abspath_p to and return 0.
1839 The string must be freed by PyMem_RawFree(). */
1840int
1841_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1842{
1843#ifdef MS_WINDOWS
1844 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1845 DWORD result;
1846
1847 result = GetFullPathNameW(path,
1848 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1849 NULL);
1850 if (!result) {
1851 return -1;
1852 }
1853
1854 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1855 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1856 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1857 }
1858 else {
1859 woutbufp = NULL;
1860 }
1861 if (!woutbufp) {
1862 *abspath_p = NULL;
1863 return 0;
1864 }
1865
1866 result = GetFullPathNameW(path, result, woutbufp, NULL);
1867 if (!result) {
1868 PyMem_RawFree(woutbufp);
1869 return -1;
1870 }
1871 }
1872
1873 if (woutbufp != woutbuf) {
1874 *abspath_p = woutbufp;
1875 return 0;
1876 }
1877
1878 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1879 return 0;
1880#else
1881 if (_Py_isabs(path)) {
1882 *abspath_p = _PyMem_RawWcsdup(path);
1883 return 0;
1884 }
1885
1886 wchar_t cwd[MAXPATHLEN + 1];
1887 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
1888 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
1889 /* unable to get the current directory */
1890 return -1;
1891 }
1892
1893 size_t cwd_len = wcslen(cwd);
1894 size_t path_len = wcslen(path);
1895 size_t len = cwd_len + 1 + path_len + 1;
1896 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1897 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
1898 }
1899 else {
1900 *abspath_p = NULL;
1901 }
1902 if (*abspath_p == NULL) {
1903 return 0;
1904 }
1905
1906 wchar_t *abspath = *abspath_p;
1907 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
1908 abspath += cwd_len;
1909
1910 *abspath = (wchar_t)SEP;
1911 abspath++;
1912
1913 memcpy(abspath, path, path_len * sizeof(wchar_t));
1914 abspath += path_len;
1915
1916 *abspath = 0;
1917 return 0;
1918#endif
1919}
1920
1921
Victor Stinnerfaddaed2019-03-19 02:58:14 +01001922/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001923 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001924
Victor Stinner1be0d112019-03-18 17:47:26 +01001925 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1926 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001927wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01001928_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001929{
1930#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01001931 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1932 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00001933#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001934 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001935 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001936 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001937
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001938 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001939 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001940 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001941 if (wname == NULL)
1942 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01001943 /* wname must have space to store the trailing NUL character */
1944 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001945 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001946 return NULL;
1947 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001948 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001949 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001950 return buf;
1951#endif
1952}
1953
Victor Stinnerdaf45552013-08-28 00:53:59 +02001954/* Duplicate a file descriptor. The new file descriptor is created as
1955 non-inheritable. Return a new file descriptor on success, raise an OSError
1956 exception and return -1 on error.
1957
1958 The GIL is released to call dup(). The caller must hold the GIL. */
1959int
1960_Py_dup(int fd)
1961{
1962#ifdef MS_WINDOWS
1963 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001964#endif
1965
Victor Stinner8a1be612016-03-14 22:07:55 +01001966 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001967
Victor Stinnerdaf45552013-08-28 00:53:59 +02001968#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001969 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001970 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001971 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001972 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001973 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001974 return -1;
1975 }
1976
Victor Stinnerdaf45552013-08-28 00:53:59 +02001977 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001978 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001979 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001980 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001981 Py_END_ALLOW_THREADS
1982 if (fd < 0) {
1983 PyErr_SetFromErrno(PyExc_OSError);
1984 return -1;
1985 }
1986
Zackery Spytz28fca0c2019-06-17 01:17:14 -06001987 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1988 _Py_BEGIN_SUPPRESS_IPH
1989 close(fd);
1990 _Py_END_SUPPRESS_IPH
1991 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001992 }
1993#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1994 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001995 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001996 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001997 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001998 Py_END_ALLOW_THREADS
1999 if (fd < 0) {
2000 PyErr_SetFromErrno(PyExc_OSError);
2001 return -1;
2002 }
2003
2004#else
2005 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04002006 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002007 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002008 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002009 Py_END_ALLOW_THREADS
2010 if (fd < 0) {
2011 PyErr_SetFromErrno(PyExc_OSError);
2012 return -1;
2013 }
2014
2015 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04002016 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002017 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04002018 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02002019 return -1;
2020 }
2021#endif
2022 return fd;
2023}
2024
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002025#ifndef MS_WINDOWS
2026/* Get the blocking mode of the file descriptor.
2027 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2028 raise an exception and return -1 on error. */
2029int
2030_Py_get_blocking(int fd)
2031{
Steve Dower8fc89802015-04-12 00:26:27 -04002032 int flags;
2033 _Py_BEGIN_SUPPRESS_IPH
2034 flags = fcntl(fd, F_GETFL, 0);
2035 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002036 if (flags < 0) {
2037 PyErr_SetFromErrno(PyExc_OSError);
2038 return -1;
2039 }
2040
2041 return !(flags & O_NONBLOCK);
2042}
2043
2044/* Set the blocking mode of the specified file descriptor.
2045
2046 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2047 otherwise.
2048
2049 Return 0 on success, raise an exception and return -1 on error. */
2050int
2051_Py_set_blocking(int fd, int blocking)
2052{
pxinwr06afac62020-12-08 04:41:12 +08002053/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2054 Use fcntl() instead. */
2055#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002056 int arg = !blocking;
2057 if (ioctl(fd, FIONBIO, &arg) < 0)
2058 goto error;
2059#else
2060 int flags, res;
2061
Steve Dower8fc89802015-04-12 00:26:27 -04002062 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002063 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002064 if (flags >= 0) {
2065 if (blocking)
2066 flags = flags & (~O_NONBLOCK);
2067 else
2068 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002069
Steve Dower8fc89802015-04-12 00:26:27 -04002070 res = fcntl(fd, F_SETFL, flags);
2071 } else {
2072 res = -1;
2073 }
2074 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002075
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002076 if (res < 0)
2077 goto error;
2078#endif
2079 return 0;
2080
2081error:
2082 PyErr_SetFromErrno(PyExc_OSError);
2083 return -1;
2084}
2085#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002086
2087
2088int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002089_Py_GetLocaleconvNumeric(struct lconv *lc,
2090 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002091{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002092 assert(decimal_point != NULL);
2093 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002094
TIGirardif2312032020-10-20 08:39:52 -03002095#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002096 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002097 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002098 change_locale = 1;
2099 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002100 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002101 change_locale = 1;
2102 }
2103
2104 /* Keep a copy of the LC_CTYPE locale */
2105 char *oldloc = NULL, *loc = NULL;
2106 if (change_locale) {
2107 oldloc = setlocale(LC_CTYPE, NULL);
2108 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002109 PyErr_SetString(PyExc_RuntimeWarning,
2110 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002111 return -1;
2112 }
2113
2114 oldloc = _PyMem_Strdup(oldloc);
2115 if (!oldloc) {
2116 PyErr_NoMemory();
2117 return -1;
2118 }
2119
2120 loc = setlocale(LC_NUMERIC, NULL);
2121 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2122 loc = NULL;
2123 }
2124
2125 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002126 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002127 if LC_NUMERIC locale is different than LC_CTYPE locale and
2128 decimal_point and/or thousands_sep are non-ASCII or longer than
2129 1 byte */
2130 setlocale(LC_CTYPE, loc);
2131 }
2132 }
2133
TIGirardif2312032020-10-20 08:39:52 -03002134#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2135#else /* MS_WINDOWS */
2136/* Use _W_* fields of Windows strcut lconv */
2137#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2138#endif /* MS_WINDOWS */
2139
Victor Stinner02e6bf72018-11-20 16:20:16 +01002140 int res = -1;
2141
TIGirardif2312032020-10-20 08:39:52 -03002142 *decimal_point = GET_LOCALE_STRING(decimal_point);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002143 if (*decimal_point == NULL) {
2144 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002145 }
2146
TIGirardif2312032020-10-20 08:39:52 -03002147 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
Victor Stinner02e6bf72018-11-20 16:20:16 +01002148 if (*thousands_sep == NULL) {
2149 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002150 }
2151
2152 res = 0;
2153
Victor Stinner02e6bf72018-11-20 16:20:16 +01002154done:
TIGirardif2312032020-10-20 08:39:52 -03002155#ifndef MS_WINDOWS
Victor Stinnercb064fc2018-01-15 15:58:02 +01002156 if (loc != NULL) {
2157 setlocale(LC_CTYPE, oldloc);
2158 }
2159 PyMem_Free(oldloc);
TIGirardif2312032020-10-20 08:39:52 -03002160#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002161 return res;
TIGirardif2312032020-10-20 08:39:52 -03002162
2163#undef GET_LOCALE_STRING
Victor Stinnercb064fc2018-01-15 15:58:02 +01002164}
Kyle Evans79925792020-10-13 15:04:44 -05002165
2166/* Our selection logic for which function to use is as follows:
2167 * 1. If close_range(2) is available, always prefer that; it's better for
2168 * contiguous ranges like this than fdwalk(3) which entails iterating over
2169 * the entire fd space and simply doing nothing for those outside the range.
2170 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2171 * closing up to sysconf(_SC_OPEN_MAX).
2172 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2173 * as that will be more performant if the range happens to have any chunk of
2174 * non-opened fd in the middle.
2175 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2176 */
2177#ifdef __FreeBSD__
2178# define USE_CLOSEFROM
2179#endif /* __FreeBSD__ */
2180
2181#ifdef HAVE_FDWALK
2182# define USE_FDWALK
2183#endif /* HAVE_FDWALK */
2184
2185#ifdef USE_FDWALK
2186static int
2187_fdwalk_close_func(void *lohi, int fd)
2188{
2189 int lo = ((int *)lohi)[0];
2190 int hi = ((int *)lohi)[1];
2191
2192 if (fd >= hi) {
2193 return 1;
2194 }
2195 else if (fd >= lo) {
2196 /* Ignore errors */
2197 (void)close(fd);
2198 }
2199 return 0;
2200}
2201#endif /* USE_FDWALK */
2202
2203/* Closes all file descriptors in [first, last], ignoring errors. */
2204void
2205_Py_closerange(int first, int last)
2206{
2207 first = Py_MAX(first, 0);
2208 _Py_BEGIN_SUPPRESS_IPH
2209#ifdef HAVE_CLOSE_RANGE
2210 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2211 /* Any errors encountered while closing file descriptors are ignored;
2212 * ENOSYS means no kernel support, though,
2213 * so we'll fallback to the other methods. */
2214 }
2215 else
2216#endif /* HAVE_CLOSE_RANGE */
2217#ifdef USE_CLOSEFROM
2218 if (last >= sysconf(_SC_OPEN_MAX)) {
2219 /* Any errors encountered while closing file descriptors are ignored */
2220 closefrom(first);
2221 }
2222 else
2223#endif /* USE_CLOSEFROM */
2224#ifdef USE_FDWALK
2225 {
2226 int lohi[2];
2227 lohi[0] = first;
2228 lohi[1] = last + 1;
2229 fdwalk(_fdwalk_close_func, lohi);
2230 }
2231#else
2232 {
2233 for (int i = first; i <= last; i++) {
2234 /* Ignore errors */
2235 (void)close(i);
2236 }
2237 }
2238#endif /* USE_FDWALK */
2239 _Py_END_SUPPRESS_IPH
2240}